# L* Cross-Heritage Validation (v2 - Fixed)

**Paper #3: Thermodynamic Constraints in Transformer Architectures**

**Author:** Davide D'Elia

**Date:** 2026-01-06

---

## Purpose

Validate the L* transition point formula across different **training heritages** (labs).

## The Formula

```
L* = L × (0.11 + 0.012×L + 4.9/H)
```

## v2 Fixes

- Fixed attention output extraction (use `attn_implementation='eager'`)
- Fixed BLOOM architecture path (`self_attention` not `attn`)
- Added model config debugging

---

In [None]:
# Cell 1: Setup
!pip install -q transformers accelerate scipy seaborn pandas huggingface_hub

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from tqdm.auto import tqdm
import json
import gc
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Configure visualization
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("paper", font_scale=1.2)

# Global timestamp
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"Session timestamp: {TIMESTAMP}")

# HF TOKEN
HF_TOKEN = None
try:
    from google.colab import userdata
    HF_TOKEN = userdata.get('HF_TOKEN')
    if HF_TOKEN:
        print(f"HF_TOKEN loaded")
except:
    HF_TOKEN = os.environ.get('HF_TOKEN')

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Model Configuration

In [None]:
# Models to test
MODELS_TO_TEST = {
    "EleutherAI/pythia-160m": {"lab": "EleutherAI", "L": 12, "H": 12, "expected": "DAMPEN"},
    "EleutherAI/pythia-410m": {"lab": "EleutherAI", "L": 24, "H": 16, "expected": "DAMPEN"},
    "facebook/opt-125m": {"lab": "Meta", "L": 12, "H": 12, "expected": "EXPAND"},
    "facebook/opt-350m": {"lab": "Meta", "L": 24, "H": 16, "expected": "EXPAND"},
    "bigscience/bloom-560m": {"lab": "BigScience", "L": 24, "H": 16, "expected": "EXPAND"},
    "openai-community/gpt2": {"lab": "OpenAI", "L": 12, "H": 12, "expected": "EXPAND"},
}

TEST_PROMPTS = [
    "The capital of France is Paris, which is known for the Eiffel Tower.",
    "In mathematics, the derivative of x squared equals two times x.",
    "Climate change affects global temperatures and weather patterns significantly.",
    "The quick brown fox jumps over the lazy dog near the riverbank.",
    "Once upon a time in a land far away, there lived a wise old king.",
]

def predict_l_star_v3(L, H):
    """L* = L × (0.11 + 0.012×L + 4.9/H)"""
    return L * (0.11 + 0.012 * L + 4.9 / H)

print(f"Models: {len(MODELS_TO_TEST)}, Prompts: {len(TEST_PROMPTS)}")

## 3. Architecture-Aware W_V Extraction

In [None]:
def get_architecture_info(model):
    """Detect model architecture type."""
    if hasattr(model, 'gpt_neox'):
        return 'pythia'
    elif hasattr(model, 'model') and hasattr(model.model, 'decoder'):
        return 'opt'
    elif hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
        # Check if it's BLOOM or GPT-2
        layer = model.transformer.h[0]
        if hasattr(layer, 'self_attention'):
            return 'bloom'
        elif hasattr(layer, 'attn'):
            return 'gpt2'
    elif hasattr(model, 'model') and hasattr(model.model, 'layers'):
        return 'llama'
    return 'unknown'


def get_layers(model, arch):
    """Get transformer layers based on architecture."""
    if arch == 'pythia':
        return model.gpt_neox.layers
    elif arch == 'opt':
        return model.model.decoder.layers
    elif arch in ['bloom', 'gpt2']:
        return model.transformer.h
    elif arch == 'llama':
        return model.model.layers
    return []


def get_W_V(model, arch, layer_idx):
    """Extract W_V matrix for a specific layer."""
    try:
        layers = get_layers(model, arch)
        layer = layers[layer_idx]
        
        if arch == 'pythia':
            # Pythia: fused QKV
            qkv = layer.attention.query_key_value.weight.data.float()
            d = qkv.shape[0] // 3
            return qkv[2*d:, :].cpu()
            
        elif arch == 'opt':
            # OPT: separate v_proj
            return layer.self_attn.v_proj.weight.data.float().cpu()
            
        elif arch == 'bloom':
            # BLOOM: fused QKV in self_attention
            qkv = layer.self_attention.query_key_value.weight.data.float()
            d = qkv.shape[0] // 3
            return qkv[2*d:, :].cpu()
            
        elif arch == 'gpt2':
            # GPT-2: c_attn contains Q, K, V concatenated
            c_attn = layer.attn.c_attn.weight.data.float()
            d = c_attn.shape[1] // 3
            return c_attn[:, 2*d:].T.cpu()
            
        elif arch == 'llama':
            return layer.self_attn.v_proj.weight.data.float().cpu()
            
    except Exception as e:
        print(f"    W_V extraction error (layer {layer_idx}): {e}")
    
    return None

print("Architecture-aware functions defined.")

## 4. Trace Computation with Attention Fix

In [None]:
def compute_traces_and_l_star(model, tokenizer, prompt, arch, device='cuda'):
    """
    Compute Sheaf Laplacian trace for each layer.
    Uses efficient formula: Tr(L_F) = (sum(A) - n) * ||W_V||_F^2
    """
    model.eval()
    
    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    n_tokens = inputs["input_ids"].shape[1]
    
    # Forward pass with attention output
    with torch.no_grad():
        outputs = model(
            **inputs,
            output_attentions=True,
            output_hidden_states=True
        )
    
    attentions = outputs.attentions
    
    # Debug: check if attentions are available
    if attentions is None:
        print(f"    WARNING: attentions is None!")
        return None, None
    
    n_layers = len(attentions)
    traces = []
    
    for layer_idx in range(n_layers):
        # Get attention matrix (average over heads)
        attn = attentions[layer_idx]
        if attn is None:
            traces.append(0.0)
            continue
            
        A = attn[0].float().mean(dim=0).cpu()  # (seq, seq)
        
        # Get W_V
        W_V = get_W_V(model, arch, layer_idx)
        
        # Compute trace
        if W_V is not None:
            A_sum = A.sum().item()
            W_V_frob_sq = (W_V ** 2).sum().item()
            trace = abs((A_sum - n_tokens) * W_V_frob_sq)
        else:
            # Fallback: just attention sum
            A_sum = A.sum().item()
            trace = abs(A_sum - n_tokens)
        
        traces.append(trace)
    
    # Find L* = layer of maximum trace gradient
    traces_arr = np.array(traces)
    if len(traces_arr) > 1:
        gradients = np.abs(np.diff(traces_arr))
        L_star = int(np.argmax(gradients)) + 1
    else:
        L_star = 0
    
    return traces, L_star

print("Trace computation function defined.")

## 5. Run Validation

In [None]:
results = []

print("=" * 80)
print("L* CROSS-HERITAGE VALIDATION (v2)")
print("=" * 80)

for model_name, config in tqdm(MODELS_TO_TEST.items(), desc="Models"):
    print(f"\n{'='*70}")
    print(f"Model: {model_name}")
    print(f"Lab: {config['lab']} | L={config['L']} | H={config['H']}")
    
    try:
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            token=HF_TOKEN if HF_TOKEN else None
        )
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Load model with attention output enabled
        # KEY FIX: Use attn_implementation='eager' for attention output
        print("  Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            token=HF_TOKEN if HF_TOKEN else None,
            trust_remote_code=True,
            attn_implementation="eager",  # KEY FIX!
            output_attentions=True,
            output_hidden_states=True
        )
        model.eval()
        
        # Detect architecture
        arch = get_architecture_info(model)
        print(f"  Architecture: {arch}")
        
        # Test prompts
        all_l_stars = []
        all_traces = []
        
        for i, prompt in enumerate(TEST_PROMPTS):
            traces, l_star = compute_traces_and_l_star(model, tokenizer, prompt, arch)
            if traces is not None:
                all_traces.append(traces)
                all_l_stars.append(l_star)
                print(f"    Prompt {i+1}: L* = {l_star}")
            else:
                print(f"    Prompt {i+1}: FAILED")
        
        if all_l_stars:
            L_star_empirical = np.mean(all_l_stars)
            L_star_std = np.std(all_l_stars)
            L_star_predicted = predict_l_star_v3(config["L"], config["H"])
            error = abs(L_star_predicted - L_star_empirical) / config["L"] * 100
            
            result = {
                "model": model_name,
                "lab": config["lab"],
                "L": config["L"],
                "H": config["H"],
                "arch": arch,
                "L_star_predicted": float(L_star_predicted),
                "L_star_empirical": float(L_star_empirical),
                "L_star_std": float(L_star_std),
                "error_pct": float(error),
            }
            results.append(result)
            
            print(f"\n  RESULTS:")
            print(f"    L* predicted:  {L_star_predicted:.1f}")
            print(f"    L* empirical:  {L_star_empirical:.1f} +/- {L_star_std:.1f}")
            print(f"    Error:         {error:.1f}%")
        
        # Cleanup
        del model, tokenizer
        gc.collect()
        torch.cuda.empty_cache()
        
    except Exception as e:
        print(f"  ERROR: {e}")
        import traceback
        traceback.print_exc()

print(f"\n{'='*80}")
print(f"COMPLETE: {len(results)}/{len(MODELS_TO_TEST)} models")
print("="*80)

## 6. Results Analysis

In [None]:
if results:
    df = pd.DataFrame(results)
    
    print("\n" + "="*80)
    print("CROSS-HERITAGE L* VALIDATION RESULTS")
    print("="*80)
    print(df.to_string(index=False))
    
    # Summary by lab
    print("\n" + "="*80)
    print("SUMMARY BY LAB")
    print("="*80)
    
    for lab in sorted(df['lab'].unique()):
        lab_df = df[df['lab'] == lab]
        errors = lab_df['error_pct'].values
        print(f"\n{lab}: n={len(lab_df)}, MAPE={np.mean(errors):.1f}%")
    
    # Overall
    overall_mape = df['error_pct'].mean()
    print(f"\n{'='*80}")
    print(f"OVERALL MAPE: {overall_mape:.1f}%")
    print("="*80)
    
    # In-sample vs out-of-sample
    pythia = df[df['lab'] == 'EleutherAI']
    others = df[df['lab'] != 'EleutherAI']
    
    if len(pythia) > 0 and len(others) > 0:
        print(f"\nPythia (in-sample):      MAPE = {pythia['error_pct'].mean():.1f}%")
        print(f"Non-Pythia (out-of-sample): MAPE = {others['error_pct'].mean():.1f}%")
else:
    print("No results!")

## 7. Visualization

In [None]:
if results:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    lab_colors = {
        "EleutherAI": "#E74C3C",
        "Meta": "#3498DB",
        "BigScience": "#27AE60",
        "OpenAI": "#9B59B6"
    }
    
    # Plot 1: Predicted vs Empirical
    ax1 = axes[0]
    for r in results:
        color = lab_colors.get(r['lab'], 'gray')
        ax1.scatter(r['L_star_predicted'], r['L_star_empirical'],
                   c=color, s=150, alpha=0.8, edgecolors='white', linewidths=2)
        ax1.annotate(r['model'].split('/')[-1],
                    (r['L_star_predicted'], r['L_star_empirical']),
                    fontsize=8, xytext=(5, 5), textcoords='offset points')
    
    max_val = max(max(r['L_star_predicted'] for r in results),
                  max(r['L_star_empirical'] for r in results))
    ax1.plot([0, max_val*1.1], [0, max_val*1.1], 'k--', alpha=0.5)
    ax1.set_xlabel('L* Predicted')
    ax1.set_ylabel('L* Empirical')
    ax1.set_title('Cross-Heritage L* Validation')
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Error by Lab
    ax2 = axes[1]
    labs = sorted(set(r['lab'] for r in results))
    for i, lab in enumerate(labs):
        lab_results = [r for r in results if r['lab'] == lab]
        errors = [r['error_pct'] for r in lab_results]
        ax2.bar(i, np.mean(errors), color=lab_colors.get(lab, 'gray'), alpha=0.7)
        ax2.scatter([i]*len(errors), errors, c='black', s=50, zorder=5)
    
    ax2.axhline(y=10, color='orange', linestyle='--', label='10% threshold')
    ax2.set_xticks(range(len(labs)))
    ax2.set_xticklabels(labs, rotation=45, ha='right')
    ax2.set_ylabel('Error (%)')
    ax2.set_title('L* Formula Error by Lab')
    ax2.legend()
    ax2.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    PNG_FILE = f"l_star_cross_heritage_{TIMESTAMP}.png"
    plt.savefig(PNG_FILE, dpi=150, bbox_inches='tight')
    plt.show()
    print(f"Saved: {PNG_FILE}")

## 8. Final Verdict

In [None]:
if results:
    overall_mape = np.mean([r['error_pct'] for r in results])
    
    if overall_mape < 10:
        verdict = "FORMULA GENERALIZES"
    elif overall_mape < 15:
        verdict = "PARTIAL GENERALIZATION"
    else:
        verdict = "CALIBRATION NEEDED"
    
    print("\n" + "="*80)
    print("FINAL VERDICT")
    print("="*80)
    print(f"""
    Formula: L* = L × (0.11 + 0.012×L + 4.9/H)
    
    Models tested: {len(results)}
    Labs tested:   {len(set(r['lab'] for r in results))}
    Overall MAPE:  {overall_mape:.1f}%
    
    VERDICT: {verdict}
    """)
    
    # Save
    output = {
        "experiment": "L* Cross-Heritage Validation v2",
        "timestamp": TIMESTAMP,
        "formula": "L* = L × (0.11 + 0.012×L + 4.9/H)",
        "n_models": len(results),
        "overall_mape": float(overall_mape),
        "verdict": verdict,
        "results": results
    }
    
    JSON_FILE = f"l_star_cross_heritage_{TIMESTAMP}.json"
    with open(JSON_FILE, 'w') as f:
        json.dump(output, f, indent=2)
    print(f"Saved: {JSON_FILE}")

In [None]:
# Download
try:
    from google.colab import files
    if 'JSON_FILE' in dir():
        files.download(JSON_FILE)
    if 'PNG_FILE' in dir():
        files.download(PNG_FILE)
    print("Downloads started!")
except:
    print("Files saved locally.")