# H4 v2 Extended: Full-Scale Sheaf Laplacian on Larger Models

**Extension of H4 v2** to larger models:
- Pythia-1B, 2.8B, 6.9B
- Mistral-7B
- Llama-3.1-8B
- Gemma-7B

## Key Findings from H4 v2 (4 small models):
- GPT-2 shows **26x higher multi-head trace** than OPT-125m
- Block-diagonal structure validated: Tr(L_F^total) = Σ_h Tr(L_F^(h))
- Layer-wise dynamics differ: GPT-2 monotonic, Pythia two-phase

## Goal:
Validate these findings on larger, production-scale models.

---
*Paper #3: Thermodynamic Constraints in Transformer Architectures*
*Author: Davide D'Elia*
*Date: 2026-01-06*

In [None]:
!pip install transformers accelerate torch numpy scipy matplotlib seaborn pandas bitsandbytes -q

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy import stats
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from tqdm.auto import tqdm
import json
from datetime import datetime
import warnings
import gc
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("paper", font_scale=1.2)

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## 1. Extended Model Configuration

Testing larger models with 4-bit quantization for memory efficiency.

In [None]:
# Extended model set - larger models
MODELS_EXTENDED = {
    # EleutherAI - DAMPENERS (larger)
    'EleutherAI/pythia-1b': {
        'lab': 'EleutherAI', 'behavior': 'DAMPEN', 'gain': 1.216, 'layers': 16
    },
    'EleutherAI/pythia-2.8b': {
        'lab': 'EleutherAI', 'behavior': 'DAMPEN', 'gain': 0.927, 'layers': 32
    },
    'EleutherAI/pythia-6.9b': {
        'lab': 'EleutherAI', 'behavior': 'DAMPEN', 'gain': 0.994, 'layers': 32
    },
    # Mistral - Expected EXPANDER
    'mistralai/Mistral-7B-v0.1': {
        'lab': 'Mistral', 'behavior': 'EXPAND', 'gain': 1.05, 'layers': 32
    },
    # Google - RMSNorm model
    'google/gemma-2b': {
        'lab': 'Google', 'behavior': 'UNKNOWN', 'gain': 1.0, 'layers': 18
    },
}

# Reference results from H4 v2 (small models)
REFERENCE_RESULTS = {
    'gpt2': {'trace_mh': 62696, 'behavior': 'EXPAND'},
    'pythia-160m': {'trace_mh': 18887, 'behavior': 'DAMPEN'},
    'pythia-410m': {'trace_mh': 11326, 'behavior': 'DAMPEN'},
    'opt-125m': {'trace_mh': 2368, 'behavior': 'EXPAND'},
}

TEST_PROMPTS = [
    "The capital of France is Paris.",
    "The sky is made of chocolate.",
    "Once upon a time in a land far away",
    "def fibonacci(n): return n if n < 2 else",
]

COLORS = {
    'EleutherAI': '#E74C3C', 'Meta': '#3498DB', 'OpenAI': '#8E44AD',
    'Mistral': '#27AE60', 'Google': '#F39C12', 'BigScience': '#1ABC9C'
}

print(f"Testing {len(MODELS_EXTENDED)} extended models")

## 2. Efficient Full-Scale Trace Computation

O(n² + d²) algorithm - no subsampling needed!

In [None]:
def compute_trace_efficient(attention, W_V):
    """
    Compute Tr(Δ_F) directly from diagonal blocks - NO SUBSAMPLING.
    
    Tr(Δ_F) = (Σ A_ij - n) · ||W_V||²_F
    
    Complexity: O(n² + d²) instead of O(n³d³)
    """
    n = attention.shape[0]
    
    # Frobenius norm squared of W_V
    W_V_frobenius_sq = (W_V ** 2).sum().item()
    
    # Off-diagonal attention sum
    attention_sum = attention.sum().item()
    attention_trace = attention.trace().item()
    off_diag_sum = attention_sum - attention_trace
    
    return off_diag_sum * W_V_frobenius_sq


def compute_multihead_trace(attentions, W_V_list):
    """
    Compute total trace for multi-head attention.
    
    Block-diagonal structure: Tr(Δ_F^total) = Σ_h Tr(Δ_F^(h))
    """
    H = attentions.shape[0]
    total_trace = 0.0
    
    for h in range(H):
        A_h = attentions[h]
        W_V_h = W_V_list[h] if isinstance(W_V_list, list) else W_V_list
        total_trace += compute_trace_efficient(A_h, W_V_h)
    
    return total_trace

print("Efficient trace functions defined.")

In [None]:
def extract_multihead_attention_W_V(model, model_name, tokenizer, prompt, device='cuda'):
    """
    Extract attention weights and W_V matrices for ALL heads.
    Supports: Pythia, OPT, GPT-2, Mistral, Gemma, LLaMA
    """
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    
    with torch.no_grad():
        outputs = model(**inputs, output_attentions=True, output_hidden_states=True)
    
    attentions = outputs.attentions
    W_V_per_layer = []
    n_heads = None
    
    # Pythia (GPT-NeoX)
    if hasattr(model, 'gpt_neox'):
        layers = model.gpt_neox.layers
        n_heads = model.config.num_attention_heads
        head_dim = model.config.hidden_size // n_heads
        
        for layer in layers:
            qkv = layer.attention.query_key_value.weight.data.float().cpu()
            hidden_size = qkv.shape[0] // 3
            W_V_full = qkv[2*hidden_size:, :]
            W_V_heads = [W_V_full[h*head_dim:(h+1)*head_dim, :] for h in range(n_heads)]
            W_V_per_layer.append(W_V_heads)
    
    # OPT
    elif hasattr(model, 'model') and hasattr(model.model, 'decoder'):
        layers = model.model.decoder.layers
        n_heads = model.config.num_attention_heads
        head_dim = model.config.hidden_size // n_heads
        
        for layer in layers:
            W_V_full = layer.self_attn.v_proj.weight.data.float().cpu()
            W_V_heads = [W_V_full[h*head_dim:(h+1)*head_dim, :] for h in range(n_heads)]
            W_V_per_layer.append(W_V_heads)
    
    # GPT-2
    elif hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
        layers = model.transformer.h
        n_heads = model.config.n_head
        head_dim = model.config.n_embd // n_heads
        
        for layer in layers:
            c_attn = layer.attn.c_attn.weight.data.float().cpu()
            hidden_size = c_attn.shape[1] // 3
            W_V_full = c_attn[:, 2*hidden_size:].T
            W_V_heads = [W_V_full[h*head_dim:(h+1)*head_dim, :] for h in range(n_heads)]
            W_V_per_layer.append(W_V_heads)
    
    # Mistral / LLaMA / Gemma (similar structure)
    elif hasattr(model, 'model') and hasattr(model.model, 'layers'):
        layers = model.model.layers
        n_heads = model.config.num_attention_heads
        head_dim = model.config.hidden_size // n_heads
        
        for layer in layers:
            if hasattr(layer.self_attn, 'v_proj'):
                W_V_full = layer.self_attn.v_proj.weight.data.float().cpu()
            else:
                W_V_full = torch.zeros(model.config.hidden_size, model.config.hidden_size)
            W_V_heads = [W_V_full[h*head_dim:(h+1)*head_dim, :] for h in range(n_heads)]
            W_V_per_layer.append(W_V_heads)
    
    return attentions, W_V_per_layer, inputs.input_ids.shape[1], n_heads

print("Multi-head extraction function defined.")

## 3. Run Extended Validation

In [None]:
def analyze_model_extended(model_name, config, test_prompts, device='cuda', use_4bit=True):
    """Full H4 v2 analysis for extended models with optional 4-bit quantization."""
    print(f"\n{'='*60}")
    print(f"Analyzing: {model_name}")
    print(f"Lab: {config['lab']}, Expected: {config['behavior']}")
    print(f"{'='*60}")
    
    # Load model with optional quantization
    print("  Loading model...")
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    if use_4bit:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16
        )
        model = AutoModelForCausalLM.from_pretrained(
            model_name, quantization_config=bnb_config,
            device_map='auto', trust_remote_code=True,
            output_attentions=True
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
            model_name, torch_dtype=torch.float16,
            device_map='auto', trust_remote_code=True,
            output_attentions=True
        )
    model.eval()
    print(f"  Model loaded. Layers: {config['layers']}")
    
    all_traces_full = []
    all_traces_mh = []
    
    for prompt in tqdm(test_prompts, desc="  Prompts"):
        try:
            attentions, W_V_per_layer, seq_len, n_heads = extract_multihead_attention_W_V(
                model, model_name, tokenizer, prompt, device
            )
            
            layer_traces_full = []
            layer_traces_mh = []
            
            for layer_idx in range(len(attentions)):
                attn = attentions[layer_idx]
                W_V_heads = W_V_per_layer[layer_idx] if layer_idx < len(W_V_per_layer) else W_V_per_layer[-1]
                
                # Average attention trace
                attn_avg = attn[0].mean(dim=0)
                trace_full = compute_trace_efficient(attn_avg, W_V_heads[0])
                layer_traces_full.append(trace_full)
                
                # Multi-head trace
                trace_mh = compute_multihead_trace(attn[0], W_V_heads)
                layer_traces_mh.append(trace_mh)
            
            all_traces_full.append(layer_traces_full)
            all_traces_mh.append(layer_traces_mh)
            
        except Exception as e:
            print(f"  Error: {e}")
            continue
    
    # Cleanup
    del model, tokenizer
    gc.collect()
    torch.cuda.empty_cache()
    
    if not all_traces_full:
        return None
    
    mean_traces_full = np.mean(all_traces_full, axis=0)
    mean_traces_mh = np.mean(all_traces_mh, axis=0)
    
    # L* from trace gradient
    gradient = np.gradient(mean_traces_mh)
    L_star = int(np.argmax(np.abs(gradient)))
    
    print(f"\n  Results:")
    print(f"    Mean trace (MH): {np.mean(mean_traces_mh):.0f}")
    print(f"    L* (max gradient): {L_star}")
    
    return {
        'model': model_name,
        'lab': config['lab'],
        'behavior': config['behavior'],
        'known_gain': config['gain'],
        'n_layers': len(mean_traces_mh),
        'n_heads': n_heads,
        'traces_full': mean_traces_full.tolist(),
        'traces_mh': mean_traces_mh.tolist(),
        'mean_trace_mh': float(np.mean(mean_traces_mh)),
        'L_star': L_star,
        'L_star_ratio': L_star / len(mean_traces_mh),
    }

print("Extended analysis function defined.")

In [None]:
# Run on all extended models
results_extended = []

for model_name, config in MODELS_EXTENDED.items():
    try:
        result = analyze_model_extended(model_name, config, TEST_PROMPTS, use_4bit=True)
        if result:
            results_extended.append(result)
    except Exception as e:
        print(f"Failed on {model_name}: {e}")
        continue

print(f"\n\n{'='*60}")
print(f"Successfully analyzed {len(results_extended)} / {len(MODELS_EXTENDED)} models")
print(f"{'='*60}")

## 4. Analysis & Comparison with Small Models

In [None]:
if results_extended:
    # Summary table
    summary = pd.DataFrame([{
        'Model': r['model'].split('/')[-1],
        'Lab': r['lab'],
        'Behavior': r['behavior'],
        'Layers': r['n_layers'],
        'Heads': r['n_heads'],
        'Mean Trace (MH)': f"{r['mean_trace_mh']:.0f}",
        'L*': r['L_star'],
        'L*/L': f"{r['L_star_ratio']:.2f}"
    } for r in results_extended])
    
    print("\n" + "="*80)
    print("H4 v2 EXTENDED RESULTS")
    print("="*80)
    print(summary.to_string(index=False))
    
    # Comparison with reference
    print("\n" + "="*80)
    print("COMPARISON WITH SMALL MODELS (Reference)")
    print("="*80)
    print("\nReference (H4 v2 small models):")
    for name, data in REFERENCE_RESULTS.items():
        print(f"  {name}: {data['trace_mh']:.0f} ({data['behavior']})")
    
    print("\nExtended models:")
    for r in results_extended:
        print(f"  {r['model'].split('/')[-1]}: {r['mean_trace_mh']:.0f} ({r['behavior']})")

In [None]:
# Visualization
if results_extended:
    fig, axes = plt.subplots(2, 2, figsize=(14, 12))
    
    # Plot 1: Multi-head trace by layer
    ax1 = axes[0, 0]
    for r in results_extended:
        layers = np.arange(len(r['traces_mh']))
        norm_layers = layers / len(r['traces_mh'])
        ax1.plot(norm_layers, r['traces_mh'],
                 label=f"{r['model'].split('/')[-1]}",
                 color=COLORS.get(r['lab'], 'gray'), linewidth=2)
    ax1.set_xlabel('Normalized Layer (l/L)')
    ax1.set_ylabel('Σ_h Tr(Δ_F^(h))')
    ax1.set_title('Multi-Head Sheaf Laplacian Trace')
    ax1.legend(fontsize=9)
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Mean trace by lab
    ax2 = axes[0, 1]
    labs = [r['lab'] for r in results_extended]
    traces = [r['mean_trace_mh'] for r in results_extended]
    colors_list = [COLORS.get(lab, 'gray') for lab in labs]
    models = [r['model'].split('/')[-1] for r in results_extended]
    
    bars = ax2.bar(range(len(models)), traces, color=colors_list)
    ax2.set_xticks(range(len(models)))
    ax2.set_xticklabels(models, rotation=45, ha='right')
    ax2.set_ylabel('Mean Trace (MH)')
    ax2.set_title('Mean Multi-Head Trace by Model')
    ax2.grid(True, alpha=0.3, axis='y')
    
    # Plot 3: L* ratio distribution
    ax3 = axes[1, 0]
    for r in results_extended:
        ax3.scatter(r['n_layers'], r['L_star_ratio'],
                    color=COLORS.get(r['lab'], 'gray'), s=150,
                    label=r['model'].split('/')[-1])
    ax3.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
    ax3.set_xlabel('Number of Layers')
    ax3.set_ylabel('L* / L')
    ax3.set_title('Transition Point vs Model Depth')
    ax3.legend(fontsize=9)
    ax3.grid(True, alpha=0.3)
    
    # Plot 4: Trace scaling with model size
    ax4 = axes[1, 1]
    for r in results_extended:
        size_proxy = r['n_layers'] * (r['n_heads'] if r['n_heads'] else 12)
        ax4.scatter(size_proxy, r['mean_trace_mh'],
                    color=COLORS.get(r['lab'], 'gray'), s=150,
                    label=r['model'].split('/')[-1])
    ax4.set_xlabel('Model Size Proxy (Layers × Heads)')
    ax4.set_ylabel('Mean Trace (MH)')
    ax4.set_title('Trace Scaling with Model Size')
    ax4.legend(fontsize=9)
    ax4.grid(True, alpha=0.3)
    
    plt.suptitle('H4 v2 Extended: Large Model Validation', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.savefig('H4_v2_extended_validation.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("\n>>> Figure saved: H4_v2_extended_validation.png")

## 5. Save Results

In [None]:
if results_extended:
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    output = {
        'experiment': 'H4 v2 Extended - Large Model Validation',
        'date': datetime.now().isoformat(),
        'models_tested': len(results_extended),
        'reference_results': REFERENCE_RESULTS,
        'extended_results': results_extended,
    }
    
    filename = f'H4_v2_extended_{timestamp}.json'
    with open(filename, 'w') as f:
        json.dump(output, f, indent=2)
    print(f"\nResults saved: {filename}")
    
    # Download
    try:
        from google.colab import files
        files.download(filename)
        files.download('H4_v2_extended_validation.png')
    except:
        print("Files saved locally.")