# CGT COMPLETE EXPERIMENT LAUNCHER
## Execute cells in order: 1 → 2 → 3 → ...

In [None]:
# @title 1. Setup Environment
!pip install -q sentence-transformers datasets scipy POT scikit-learn
import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available(): print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
# @title 2. Upload and Extract cgt_project_FINAL.zip
from google.colab import files
import zipfile, os
!rm -rf /content/cgt_project /content/checkpoints
print('Cleaned. Upload cgt_project_FINAL.zip:')
uploaded = files.upload()
for f in uploaded:
    if f.endswith('.zip'):
        with zipfile.ZipFile(f,'r') as z: z.extractall('/content')
        print(f'Extracted: {f}')
        os.remove(f)
# Verify
import os
if os.path.exists('/content/cgt_project/src/cgt/__init__.py'):
    print('✅ Structure OK: /content/cgt_project/src/cgt/')
else:
    print('❌ ERROR: Structure invalid')
    !find /content -name 'cgt_hardened.py' 2>/dev/null

In [None]:
# @title 3. Add Project to Path and Import
import sys
# Clear any stale imports
for mod in list(sys.modules.keys()):
    if 'cgt' in mod or 'unified' in mod or 'ablations' in mod or 'benchmarks' in mod or 'analysis' in mod:
        del sys.modules[mod]
# Add paths
if '/content/cgt_project/src' not in sys.path:
    sys.path.insert(0, '/content/cgt_project/src')
if '/content/cgt_project/experiments' not in sys.path:
    sys.path.insert(1, '/content/cgt_project/experiments')
print(f'sys.path[0]: {sys.path[0]}')
print(f'sys.path[1]: {sys.path[1]}')
# Test import
from cgt.models.cgt_hardened import CGTStudentHardened
from cgt.geometry.lorentz_hardened import LorentzSubstrateHardened
print('✅ Core imported')
from unified import run_all_replications, train_hybrid, load_stsb_data, load_hybrid_data
from unified.final_executor import run_final_execution
print('✅ Unified imported')
from benchmarks.cascade_compression import run_cascade_compression
from benchmarks.latency_benchmark import run_latency_benchmark, LatencyConfig
print('✅ Benchmarks imported')
from ablations.euclidean_ablation import run_euclidean_ablation, AblationConfig
from ablations.dimensional_ablation import run_dimensional_ablation, DimensionalAblationConfig
from ablations.geometric_capacity import run_geometric_capacity_analysis, GeometricCapacityConfig
from ablations.mrl_comparison import run_mrl_comparison, MRLConfig
from ablations.bq_comparison import run_bq_comparison, BQComparisonConfig
print('✅ Ablations imported')
from analysis.statistical_robustness import run_statistical_robustness, RobustnessConfig
from analysis.storage_efficiency import run_storage_analysis
print('✅ Analysis imported')
print('\n✅ ALL IMPORTS SUCCESSFUL')

In [None]:
# @title 4. Configuration
from pathlib import Path
OUTPUT_BASE = Path('/content/experiment_outputs')
OUTPUT_BASE.mkdir(exist_ok=True)
for d in ['outputs','tables','checkpoints','benchmarks','ablations','analysis']:
    (OUTPUT_BASE/d).mkdir(exist_ok=True)
SKIP_PSI_SLM = True
INCLUDE_PSI_SLM_FULL = True  # Enable Ψ-SLM Full architecture
print(f'Output: {OUTPUT_BASE}')

In [None]:
# @title 5. Run Replications (3 models)
from unified import run_all_replications, load_stsb_data
print('Loading STS-B...')
data = load_stsb_data()
print('Running replications...')
replication_results = run_all_replications(output_base=OUTPUT_BASE/'outputs', data=data, skip_psi_slm=SKIP_PSI_SLM)
print('✅ Replications complete')

In [None]:
# @title 6. Train Hybrid Model
from unified import train_hybrid, load_hybrid_data
print('Loading hybrid data...')
hybrid_data = load_hybrid_data()
print('Training hybrid...')
hybrid_results = train_hybrid(output_dir=OUTPUT_BASE/'outputs'/'hybrid', data=hybrid_data)
print('✅ Hybrid complete')

In [None]:
# @title 6b. Train PSI_SLM_FULL (if enabled)
if INCLUDE_PSI_SLM_FULL:
    print('Training PSI_SLM_FULL...')
    from unified.psi_slm_trainer import PsiSlmFullTrainer
    from unified.config import ModelType
    
    trainer = PsiSlmFullTrainer(
        model_type=ModelType.PSI_SLM_FULL,
        output_dir=OUTPUT_BASE/'outputs',
    )
    
    psi_slm_results = trainer.train(
        train_emb1=data['train_emb1'],
        train_emb2=data['train_emb2'],
        train_scores=data['train_scores'],
        val_emb1=data['validation_emb1'],
        val_emb2=data['validation_emb2'],
        val_scores=data['validation_scores'],
    )
    print(f'✅ PSI_SLM_FULL complete: ρ = {psi_slm_results["best_val_rho"]:.4f}')
else:
    print('⏭️ PSI_SLM_FULL skipped (INCLUDE_PSI_SLM_FULL=False)')


In [None]:
# @title 7. Final Evaluation (F1-F3)
from unified.final_executor import run_final_execution
print('Running final evaluation...')
final_results = run_final_execution(output_base=OUTPUT_BASE, skip_psi_slm=SKIP_PSI_SLM)
print('✅ Evaluation complete')

In [None]:
# @title 8. Display Results
p = OUTPUT_BASE/'tables'/'final_results.txt'
if p.exists(): print(open(p).read())
else: print('Run evaluation first')

In [None]:
# @title 9. Cascade Compression (I.19)
import torch, json
from benchmarks.cascade_compression import run_cascade_compression
from cgt.models.cgt_hardened import CGTStudentHardened
from cgt.geometry.lorentz_hardened import LorentzSubstrateHardened, LorentzConfig
from unified import load_stsb_data
cp = OUTPUT_BASE/'outputs'/'k_light_numerical_parity'/'model_checkpoint.pth'
if cp.exists():
    ckpt = torch.load(cp, map_location='cuda', weights_only=False)
    model = CGTStudentHardened(teacher_dim=384, student_dim=32, hidden_dim=256)
    model.load_state_dict(ckpt['model_state_dict'])
    model = model.cuda().double().eval()
    data = load_stsb_data()
    with torch.no_grad():
        e1 = model(data['test_emb1'].cuda().double())
        e2 = model(data['test_emb2'].cuda().double())
    run_cascade_compression(e1,e2,data['test_scores'],0.76,0.8203,OUTPUT_BASE/'benchmarks'/'cascade')
    print('✅ Cascade complete')
else: print(f'⚠️ {cp} not found')

In [None]:
# @title 10. Euclidean Ablation (IV.1)
from ablations.euclidean_ablation import run_euclidean_ablation, AblationConfig
cfg = AblationConfig(student_dim=32, hidden_dim=256, num_epochs=25, seed=42)
run_euclidean_ablation(data['train_emb1'],data['train_emb2'],data['train_scores'],data['validation_emb1'],data['validation_emb2'],data['validation_scores'],data['test_emb1'],data['test_emb2'],data['test_scores'],0.8203,cfg,OUTPUT_BASE/'ablations'/'euclidean')
print('✅ Euclidean ablation complete')

In [None]:
# @title 11. Dimensional Ablation (IV.1b)
from ablations.dimensional_ablation import run_dimensional_ablation, DimensionalAblationConfig
cfg = DimensionalAblationConfig(test_dimensions=[8,16,32,64,128], num_epochs=25, seed=42)
run_dimensional_ablation(data['train_emb1'],data['train_emb2'],data['train_scores'],data['validation_emb1'],data['validation_emb2'],data['validation_scores'],data['test_emb1'],data['test_emb2'],data['test_scores'],0.8203,cfg,OUTPUT_BASE/'ablations'/'dimensional')
print('✅ Dimensional ablation complete')

In [None]:
# @title 12. Geometric Capacity (IV.1c)
from ablations.geometric_capacity import run_geometric_capacity_analysis, GeometricCapacityConfig
cfg = GeometricCapacityConfig(test_dimensions=[8,16,32,64], num_epochs=25, seed=42)
run_geometric_capacity_analysis(data['train_emb1'],data['train_emb2'],data['train_scores'],data['test_emb1'],data['test_emb2'],data['test_scores'],0.8203,cfg,OUTPUT_BASE/'ablations'/'capacity')
print('✅ Capacity analysis complete')

In [None]:
# @title 13. MRL Comparison (IV.2)
from ablations.mrl_comparison import run_mrl_comparison, MRLConfig
cfg = MRLConfig(target_dims=[8,16,32,64,128,256], seed=42)
run_mrl_comparison(data['test_emb1'],data['test_emb2'],data['test_scores'],0.8203,0.76,cfg,OUTPUT_BASE/'ablations'/'mrl')
print('✅ MRL comparison complete')

In [None]:
# @title 14. BQ-768 Comparison (IV.3)
import torch
from ablations.bq_comparison import run_bq_comparison, BQComparisonConfig
from cgt.models.cgt_hardened import CGTStudentHardened
from cgt.geometry.lorentz_hardened import LorentzSubstrateHardened, LorentzConfig
cp = OUTPUT_BASE/'outputs'/'k_light_numerical_parity'/'model_checkpoint.pth'
if cp.exists():
    ckpt = torch.load(cp, map_location='cuda', weights_only=False)
    cfg_l = LorentzConfig(intrinsic_dim=32)
    substrate = LorentzSubstrateHardened(cfg_l)
    model = CGTStudentHardened(teacher_dim=384, student_dim=32, hidden_dim=256)
    model.load_state_dict(ckpt['model_state_dict'])
    model = model.cuda().double().eval()
    with torch.no_grad():
        e1 = model(data['test_emb1'].cuda().double())
        e2 = model(data['test_emb2'].cuda().double())
    cfg = BQComparisonConfig(bq_dimensions=[64,128,256,384,512,768])
    run_bq_comparison(data['test_emb1'],data['test_emb2'],data['test_scores'],e1,e2,substrate,0.8203,0.76,cfg,OUTPUT_BASE/'ablations'/'bq')
    print('✅ BQ comparison complete')
else: print(f'⚠️ {cp} not found')

In [None]:
# @title 15. Latency Benchmark (IV.4)
import torch
from benchmarks.latency_benchmark import run_latency_benchmark, LatencyConfig
from cgt.models.cgt_hardened import CGTStudentHardened
from cgt.geometry.lorentz_hardened import LorentzSubstrateHardened, LorentzConfig
cp = OUTPUT_BASE/'outputs'/'k_light_numerical_parity'/'model_checkpoint.pth'
if cp.exists():
    ckpt = torch.load(cp, map_location='cuda', weights_only=False)
    cfg_l = LorentzConfig(intrinsic_dim=32)
    substrate = LorentzSubstrateHardened(cfg_l).cuda()
    model = CGTStudentHardened(teacher_dim=384, student_dim=32, hidden_dim=256)
    model.load_state_dict(ckpt['model_state_dict'])
    model = model.cuda().double().eval()
    with torch.no_grad(): cgt_emb = model(data['test_emb1'].cuda().double())
    cfg = LatencyConfig(warmup_iterations=10, n_iterations=100)
    run_latency_benchmark(data['test_emb1'].cuda().double(), cgt_emb, substrate, cfg, OUTPUT_BASE/'benchmarks'/'latency')
    print('✅ Latency benchmark complete')
else: print(f'⚠️ {cp} not found')

In [None]:
# @title 16. Statistical Robustness (VI)
from analysis.statistical_robustness import run_statistical_robustness, RobustnessConfig
cfg = RobustnessConfig(seeds=[42,123,456,789,1011], student_dim=32, hidden_dim=256, num_epochs=25)
run_statistical_robustness(data['train_emb1'],data['train_emb2'],data['train_scores'],data['validation_emb1'],data['validation_emb2'],data['validation_scores'],data['test_emb1'],data['test_emb2'],data['test_scores'],0.8203,cfg,OUTPUT_BASE/'analysis'/'robustness')
print('✅ Robustness analysis complete')

In [None]:
# @title 17. Storage Efficiency (VIII)
from analysis.storage_efficiency import run_storage_analysis
run_storage_analysis(0.8203, 0.76, 0.68, 0.78, OUTPUT_BASE/'analysis'/'storage')
print('✅ Storage analysis complete')

In [None]:
# @title 18. Create Final Delivery ZIP
import shutil
from pathlib import Path
D = Path('/content/FINAL_DELIVERY')
if D.exists(): shutil.rmtree(D)
D.mkdir()
shutil.copytree(OUTPUT_BASE, D/'experiment_outputs', dirs_exist_ok=True)
shutil.make_archive('/content/FINAL_DELIVERY', 'zip', D)
print('✅ FINAL_DELIVERY.zip created')
!ls -lh /content/FINAL_DELIVERY.zip

In [None]:
# @title 19. Download
from google.colab import files
files.download('/content/FINAL_DELIVERY.zip')
print('✅ Download started')