# OPT Anomaly Investigation

## The Anomaly

From H4 v2 validation:
- **OPT-125m**: Mean Trace (MH) = **2,368** despite G > 1 (EXPAND)
- **GPT-2**: Mean Trace (MH) = **62,696** (also EXPAND)

**26× difference** between two EXPAND models!

## Hypotheses

1. **Tied Embeddings**: OPT uses tied input/output embeddings
2. **W_V Architecture**: Different W_V initialization/structure
3. **Attention Pattern**: Different attention distribution
4. **Third Thermodynamic Category**: OPT represents a distinct category

---
*Paper #3: Thermodynamic Constraints in Transformer Architectures*
*Author: Davide D'Elia*
*Date: 2026-01-06*

In [None]:
!pip install transformers torch numpy scipy matplotlib seaborn pandas -q

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from tqdm.auto import tqdm
import json
from datetime import datetime
import warnings
import gc
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("paper", font_scale=1.2)

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")

## 1. OPT Family Analysis

Compare multiple OPT sizes to see if the anomaly persists.

In [None]:
OPT_MODELS = {
    'facebook/opt-125m': {'layers': 12, 'heads': 12, 'hidden': 768},
    'facebook/opt-350m': {'layers': 24, 'heads': 16, 'hidden': 1024},
    'facebook/opt-1.3b': {'layers': 24, 'heads': 32, 'hidden': 2048},
}

COMPARISON_MODELS = {
    'gpt2': {'layers': 12, 'heads': 12, 'hidden': 768, 'lab': 'OpenAI'},
    'EleutherAI/pythia-160m': {'layers': 12, 'heads': 12, 'hidden': 768, 'lab': 'EleutherAI'},
}

TEST_PROMPTS = [
    "The capital of France is Paris.",
    "Once upon a time in a land far away",
]

## 2. Hypothesis 1: Tied Embeddings Investigation

In [None]:
def check_tied_embeddings(model_name):
    """Check if model uses tied embeddings."""
    config = AutoConfig.from_pretrained(model_name)
    
    # Different models store this differently
    tied = False
    if hasattr(config, 'tie_word_embeddings'):
        tied = config.tie_word_embeddings
    elif hasattr(config, 'tie_encoder_decoder'):
        tied = config.tie_encoder_decoder
        
    return {
        'model': model_name,
        'tied_embeddings': tied,
        'vocab_size': config.vocab_size,
        'hidden_size': config.hidden_size if hasattr(config, 'hidden_size') else config.n_embd,
    }

print("Checking tied embeddings...\n")
for model_name in list(OPT_MODELS.keys()) + list(COMPARISON_MODELS.keys()):
    info = check_tied_embeddings(model_name)
    print(f"{model_name.split('/')[-1]:20} | Tied: {info['tied_embeddings']}")

## 3. Hypothesis 2: W_V Architecture Comparison

In [None]:
def analyze_W_V_structure(model_name, device='cuda'):
    """Deep analysis of W_V matrix properties."""
    print(f"\nAnalyzing W_V: {model_name}")
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name, torch_dtype=torch.float16, device_map='auto'
    )
    model.eval()
    
    W_V_stats = []
    
    # Extract W_V based on architecture
    if hasattr(model, 'model') and hasattr(model.model, 'decoder'):  # OPT
        layers = model.model.decoder.layers
        for i, layer in enumerate(layers):
            W_V = layer.self_attn.v_proj.weight.data.float().cpu()
            W_V_stats.append({
                'layer': i,
                'frobenius_norm': torch.norm(W_V, 'fro').item(),
                'spectral_norm': torch.linalg.norm(W_V, 2).item(),
                'mean': W_V.mean().item(),
                'std': W_V.std().item(),
                'rank': torch.linalg.matrix_rank(W_V).item(),
                'condition_number': (torch.linalg.cond(W_V.float())).item(),
            })
    elif hasattr(model, 'transformer'):  # GPT-2
        layers = model.transformer.h
        for i, layer in enumerate(layers):
            c_attn = layer.attn.c_attn.weight.data.float().cpu()
            hidden_size = c_attn.shape[1] // 3
            W_V = c_attn[:, 2*hidden_size:].T
            W_V_stats.append({
                'layer': i,
                'frobenius_norm': torch.norm(W_V, 'fro').item(),
                'spectral_norm': torch.linalg.norm(W_V, 2).item(),
                'mean': W_V.mean().item(),
                'std': W_V.std().item(),
                'rank': torch.linalg.matrix_rank(W_V).item(),
                'condition_number': (torch.linalg.cond(W_V.float())).item(),
            })
    elif hasattr(model, 'gpt_neox'):  # Pythia
        layers = model.gpt_neox.layers
        for i, layer in enumerate(layers):
            qkv = layer.attention.query_key_value.weight.data.float().cpu()
            hidden_size = qkv.shape[0] // 3
            W_V = qkv[2*hidden_size:, :]
            W_V_stats.append({
                'layer': i,
                'frobenius_norm': torch.norm(W_V, 'fro').item(),
                'spectral_norm': torch.linalg.norm(W_V, 2).item(),
                'mean': W_V.mean().item(),
                'std': W_V.std().item(),
                'rank': torch.linalg.matrix_rank(W_V).item(),
                'condition_number': (torch.linalg.cond(W_V.float())).item(),
            })
    
    del model, tokenizer
    gc.collect()
    torch.cuda.empty_cache()
    
    return {
        'model': model_name,
        'stats': W_V_stats,
        'mean_frobenius': np.mean([s['frobenius_norm'] for s in W_V_stats]),
        'mean_spectral': np.mean([s['spectral_norm'] for s in W_V_stats]),
        'mean_condition': np.mean([s['condition_number'] for s in W_V_stats]),
    }

print("W_V analysis function defined.")

In [None]:
# Run W_V analysis on key models
W_V_results = {}

for model_name in ['facebook/opt-125m', 'gpt2', 'EleutherAI/pythia-160m']:
    W_V_results[model_name] = analyze_W_V_structure(model_name)

# Summary comparison
print("\n" + "="*60)
print("W_V STRUCTURE COMPARISON")
print("="*60)

for name, result in W_V_results.items():
    print(f"\n{name.split('/')[-1]}:")
    print(f"  ||W_V||_F (mean): {result['mean_frobenius']:.2f}")
    print(f"  ||W_V||_2 (mean): {result['mean_spectral']:.2f}")
    print(f"  Condition # (mean): {result['mean_condition']:.2f}")

## 4. Hypothesis 3: Attention Pattern Analysis

In [None]:
def analyze_attention_patterns(model_name, prompts, device='cuda'):
    """Analyze attention entropy and distribution patterns."""
    print(f"\nAnalyzing attention: {model_name}")
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        
    model = AutoModelForCausalLM.from_pretrained(
        model_name, torch_dtype=torch.float16, device_map='auto',
        output_attentions=True
    )
    model.eval()
    
    all_entropies = []
    all_sparsities = []
    
    for prompt in prompts:
        inputs = tokenizer(prompt, return_tensors='pt').to(device)
        
        with torch.no_grad():
            outputs = model(**inputs, output_attentions=True)
        
        for layer_idx, attn in enumerate(outputs.attentions):
            # Average over heads and batch
            attn_avg = attn[0].mean(dim=0).float().cpu()  # (seq, seq)
            
            # Entropy of attention distribution
            entropy = -torch.sum(attn_avg * torch.log(attn_avg + 1e-10), dim=-1).mean().item()
            
            # Sparsity (fraction of weights < 0.01)
            sparsity = (attn_avg < 0.01).float().mean().item()
            
            all_entropies.append({'layer': layer_idx, 'entropy': entropy})
            all_sparsities.append({'layer': layer_idx, 'sparsity': sparsity})
    
    del model, tokenizer
    gc.collect()
    torch.cuda.empty_cache()
    
    # Aggregate by layer
    df_entropy = pd.DataFrame(all_entropies).groupby('layer').mean()
    df_sparsity = pd.DataFrame(all_sparsities).groupby('layer').mean()
    
    return {
        'model': model_name,
        'entropy_by_layer': df_entropy['entropy'].tolist(),
        'sparsity_by_layer': df_sparsity['sparsity'].tolist(),
        'mean_entropy': df_entropy['entropy'].mean(),
        'mean_sparsity': df_sparsity['sparsity'].mean(),
    }

print("Attention pattern analysis function defined.")

In [None]:
# Run attention analysis
attn_results = {}

for model_name in ['facebook/opt-125m', 'gpt2', 'EleutherAI/pythia-160m']:
    attn_results[model_name] = analyze_attention_patterns(model_name, TEST_PROMPTS)

# Summary
print("\n" + "="*60)
print("ATTENTION PATTERN COMPARISON")
print("="*60)

for name, result in attn_results.items():
    print(f"\n{name.split('/')[-1]}:")
    print(f"  Mean Entropy: {result['mean_entropy']:.3f}")
    print(f"  Mean Sparsity: {result['mean_sparsity']:.3f}")

## 5. Visualization

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

colors = {'opt-125m': '#3498DB', 'gpt2': '#8E44AD', 'pythia-160m': '#E74C3C'}

# Plot 1: W_V Frobenius norm by layer
ax1 = axes[0, 0]
for name, result in W_V_results.items():
    short_name = name.split('/')[-1]
    layers = [s['layer'] for s in result['stats']]
    norms = [s['frobenius_norm'] for s in result['stats']]
    ax1.plot(layers, norms, label=short_name, color=colors.get(short_name, 'gray'), linewidth=2)
ax1.set_xlabel('Layer')
ax1.set_ylabel('||W_V||_F')
ax1.set_title('W_V Frobenius Norm by Layer')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Attention entropy by layer
ax2 = axes[0, 1]
for name, result in attn_results.items():
    short_name = name.split('/')[-1]
    layers = range(len(result['entropy_by_layer']))
    ax2.plot(layers, result['entropy_by_layer'], label=short_name, 
             color=colors.get(short_name, 'gray'), linewidth=2)
ax2.set_xlabel('Layer')
ax2.set_ylabel('Entropy')
ax2.set_title('Attention Entropy by Layer')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: W_V spectral norm comparison
ax3 = axes[1, 0]
models = list(W_V_results.keys())
spectral_norms = [W_V_results[m]['mean_spectral'] for m in models]
model_names = [m.split('/')[-1] for m in models]
bars = ax3.bar(model_names, spectral_norms, color=[colors.get(n, 'gray') for n in model_names])
ax3.set_ylabel('Mean ||W_V||_2')
ax3.set_title('W_V Spectral Norm (KEY DIFFERENCE!)')
ax3.grid(True, alpha=0.3, axis='y')

# Plot 4: Summary comparison
ax4 = axes[1, 1]
# Create comparison table
comparison_data = []
for name in ['facebook/opt-125m', 'gpt2', 'EleutherAI/pythia-160m']:
    short = name.split('/')[-1]
    comparison_data.append({
        'Model': short,
        '||W_V||_F': W_V_results[name]['mean_frobenius'],
        'Entropy': attn_results[name]['mean_entropy'],
    })

df_comparison = pd.DataFrame(comparison_data)
ax4.axis('off')
table = ax4.table(cellText=df_comparison.round(2).values,
                   colLabels=df_comparison.columns,
                   cellLoc='center', loc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1.2, 1.5)
ax4.set_title('Summary Comparison', pad=20)

plt.suptitle('OPT Anomaly Investigation', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('OPT_anomaly_investigation.png', dpi=150, bbox_inches='tight')
plt.show()

## 6. Conclusion

In [None]:
print("\n" + "="*70)
print("OPT ANOMALY INVESTIGATION - CONCLUSIONS")
print("="*70)

# Analyze findings
opt_spectral = W_V_results['facebook/opt-125m']['mean_spectral']
gpt2_spectral = W_V_results['gpt2']['mean_spectral']
pythia_spectral = W_V_results['EleutherAI/pythia-160m']['mean_spectral']

print(f"\n1. W_V SPECTRAL NORM:")
print(f"   OPT-125m:    {opt_spectral:.2f}")
print(f"   GPT-2:       {gpt2_spectral:.2f}")
print(f"   Pythia-160m: {pythia_spectral:.2f}")
print(f"   Ratio GPT-2/OPT: {gpt2_spectral/opt_spectral:.1f}x")

print(f"\n2. ATTENTION ENTROPY:")
print(f"   OPT-125m:    {attn_results['facebook/opt-125m']['mean_entropy']:.3f}")
print(f"   GPT-2:       {attn_results['gpt2']['mean_entropy']:.3f}")
print(f"   Pythia-160m: {attn_results['EleutherAI/pythia-160m']['mean_entropy']:.3f}")

print("\n" + "-"*70)
print("\nHYPOTHESIS VERDICT:")

if gpt2_spectral > 3 * opt_spectral:
    print("\n✅ CONFIRMED: W_V spectral norm explains the trace difference!")
    print("   OPT has much smaller ||W_V||, leading to smaller trace.")
    print("   Trace ∝ ||W_V||_F², so small W_V → small trace.")
else:
    print("\n⚠️ PARTIAL: W_V difference exists but may not fully explain 26x.")

print("\n" + "-"*70)
print("\nIMPLICATION:")
print("OPT may represent a THIRD thermodynamic category:")
print("  - DAMPEN (EleutherAI): High W_V, low trace growth")
print("  - EXPAND (OpenAI): High W_V, high trace growth")
print("  - COMPACT (Meta?): Low W_V, low trace, but still G > 1")

In [None]:
# Save results
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

output = {
    'experiment': 'OPT Anomaly Investigation',
    'date': datetime.now().isoformat(),
    'W_V_analysis': {k: {kk: vv for kk, vv in v.items() if kk != 'stats'} 
                     for k, v in W_V_results.items()},
    'attention_analysis': attn_results,
}

filename = f'OPT_anomaly_{timestamp}.json'
with open(filename, 'w') as f:
    json.dump(output, f, indent=2)
print(f"\nResults saved: {filename}")

try:
    from google.colab import files
    files.download(filename)
    files.download('OPT_anomaly_investigation.png')
except:
    print("Files saved locally.")