# Semantic Gravity Experiment Pipeline

This notebook runs the complete Semantic Gravity experiment pipeline:
1. Setup and dependencies
2. Inference (greedy + sampling)
3. Behavior analysis
4. Mechanistic metrics
5. Activation patching
6. Bootstrap CIs
7. Visualization

In [None]:
# Install dependencies
!pip install -q torch transformers accelerate tokenizers numpy pandas scipy matplotlib tqdm requests wordfreq

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# Set paths - UPDATE THESE FOR YOUR SETUP
REPO_DIR = '/content/drive/MyDrive/Semantic_Gravity'  # Path to repo in Drive
MODEL_DIR = '/content/drive/MyDrive/models/your_model'  # Path to model weights

os.chdir(REPO_DIR)
print(f'Working directory: {os.getcwd()}')

In [None]:
# Check GPU
!nvidia-smi

import torch
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
else:
    print('No GPU available')

In [None]:
import sys
sys.path.insert(0, REPO_DIR)

# Set environment variables if needed
# os.environ['DEEPSEEK_API_KEY'] = 'your_key_here'  # For dataset generation

print('Repository added to path')

In [None]:
# OPTIONAL: Run dataset pipeline (uncomment if needed)
# from src.dataset_pipeline import build_dataset
# build_dataset()

In [None]:
# Run inference
from src.config import setup_directories

RUN_ROOT = setup_directories()['run_root']
print(f'Run root: {RUN_ROOT}')

from src.runner import run_experiment
run_experiment(output_root=RUN_ROOT, limit=None)

In [None]:
# Run behavior analysis
from src.behavior_analysis import run_behavior_analysis_pipeline
run_behavior_analysis_pipeline(output_root=RUN_ROOT)

In [None]:
# Run mechanistic metrics
from src.metrics_attn import run_mechanistic_metrics_pipeline
run_mechanistic_metrics_pipeline(output_root=RUN_ROOT)

In [None]:
# Run activation patching
from src.patching import run_activation_patching_pipeline
run_activation_patching_pipeline(output_root=RUN_ROOT)

In [None]:
# Run bootstrap CIs and visualization
from src.bootstrap import run_bootstrap_pipeline
from src.visualize import run_visualization_pipeline

run_bootstrap_pipeline(output_root=RUN_ROOT)
result_paths = run_visualization_pipeline(output_root=RUN_ROOT)

print('\nGenerated outputs:')
for name, path in result_paths.items():
    print(f'  {name}: {path}')