# Restriction Map Spectral Analysis v2 (Fixed Architectures)

## Fixes in v2:
- GPT-Neo: Uses `attention.k_proj`, `v_proj`, `q_proj`, `out_proj`
- OPT: Uses `model.model.decoder.layers` with `self_attn.*_proj`
- BLOOM: Uses `transformer.h` with `self_attention.query_key_value`

---
*Paper #3: Thermodynamics of Language Models*

In [None]:
!pip install transformers torch matplotlib seaborn pandas scipy -q

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from transformers import AutoModelForCausalLM, AutoConfig
import gc
import json
from datetime import datetime
from typing import Dict, List, Optional
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("paper", font_scale=1.2)

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Models to test - Representatives from each lab
MODELS_CONFIG = {
    # EleutherAI (Dampener Heritage)
    "EleutherAI/pythia-160m": {
        "lab": "EleutherAI",
        "expected": "DAMPENER",
        "color": "#E74C3C",
        "marker": "o"
    },
    "EleutherAI/pythia-410m": {
        "lab": "EleutherAI",
        "expected": "DAMPENER",
        "color": "#C0392B",
        "marker": "s"
    },
    "EleutherAI/gpt-neo-125M": {
        "lab": "EleutherAI",
        "expected": "DAMPENER",
        "color": "#F39C12",
        "marker": "^"
    },
    
    # Meta (Expander Heritage)
    "facebook/opt-125m": {
        "lab": "Meta",
        "expected": "EXPANDER",
        "color": "#3498DB",
        "marker": "o"
    },
    "facebook/opt-350m": {
        "lab": "Meta",
        "expected": "EXPANDER",
        "color": "#2980B9",
        "marker": "s"
    },
    
    # BigScience (Expander Heritage - ALiBi)
    "bigscience/bloom-560m": {
        "lab": "BigScience",
        "expected": "EXPANDER",
        "color": "#27AE60",
        "marker": "o"
    },
    
    # OpenAI (Baseline)
    "gpt2": {
        "lab": "OpenAI",
        "expected": "NEUTRAL",
        "color": "#9B59B6",
        "marker": "o"
    }
}

print(f"Testing {len(MODELS_CONFIG)} models")

In [None]:
def detect_architecture(model, model_name: str) -> tuple:
    """
    Detect model architecture and return (layers, arch_type).
    
    Supported architectures:
    - neox: Pythia (gpt_neox.layers)
    - gpt_neo: GPT-Neo (transformer.h with attention.k_proj)
    - gpt2: GPT-2 (transformer.h with attn.c_attn)
    - opt: OPT (model.model.decoder.layers)
    - bloom: BLOOM (transformer.h with self_attention)
    - llama: Llama/Mistral (model.layers)
    """
    
    # Check for Pythia (GPT-NeoX)
    if hasattr(model, 'gpt_neox'):
        return model.gpt_neox.layers, 'neox'
    
    # Check for OPT (has model.model.decoder)
    if hasattr(model, 'model') and hasattr(model.model, 'decoder'):
        return model.model.decoder.layers, 'opt'
    
    # Check for Llama/Mistral (has model.model.layers)
    if hasattr(model, 'model') and hasattr(model.model, 'layers'):
        return model.model.layers, 'llama'
    
    # Check for transformer.h based models (GPT-2, GPT-Neo, GPT-J, BLOOM)
    if hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
        layers = model.transformer.h
        layer0 = layers[0]
        
        # BLOOM: has self_attention.query_key_value
        if hasattr(layer0, 'self_attention') and hasattr(layer0.self_attention, 'query_key_value'):
            return layers, 'bloom'
        
        # GPT-Neo: has attn.attention with k_proj, v_proj, q_proj
        if hasattr(layer0, 'attn') and hasattr(layer0.attn, 'attention'):
            if hasattr(layer0.attn.attention, 'k_proj'):
                return layers, 'gpt_neo'
        
        # GPT-J: has attn.out_proj directly
        if hasattr(layer0, 'attn') and hasattr(layer0.attn, 'out_proj'):
            return layers, 'gptj'
        
        # GPT-2: has attn.c_attn
        if hasattr(layer0, 'attn') and hasattr(layer0.attn, 'c_attn'):
            return layers, 'gpt2'
    
    return None, 'unknown'


def get_projection_matrices(model, model_name: str) -> Dict[str, List[torch.Tensor]]:
    """
    Extract W_V and W_O from all layers.
    Handles multiple architectures correctly.
    """
    W_V_list = []
    W_O_list = []
    W_Q_list = []
    W_K_list = []
    
    layers, arch = detect_architecture(model, model_name)
    
    if layers is None:
        print(f"  Unknown architecture for {model_name}")
        # Debug: print model structure
        print(f"  Model attributes: {[a for a in dir(model) if not a.startswith('_')]}")
        return {}
    
    print(f"  Architecture: {arch}, Layers: {len(layers)}")
    
    for i, layer in enumerate(layers):
        try:
            W_Q, W_K, W_V, W_O = None, None, None, None
            
            if arch == 'neox':  # Pythia
                qkv = layer.attention.query_key_value.weight.detach()
                hidden_size = qkv.shape[0] // 3
                W_Q = qkv[:hidden_size, :]
                W_K = qkv[hidden_size:2*hidden_size, :]
                W_V = qkv[2*hidden_size:, :]
                W_O = layer.attention.dense.weight.detach()
                
            elif arch == 'gpt_neo':  # GPT-Neo
                # GPT-Neo has nested attention structure
                attn = layer.attn.attention
                W_Q = attn.q_proj.weight.detach()
                W_K = attn.k_proj.weight.detach()
                W_V = attn.v_proj.weight.detach()
                W_O = attn.out_proj.weight.detach()
                
            elif arch == 'gptj':  # GPT-J
                W_Q = layer.attn.q_proj.weight.detach()
                W_K = layer.attn.k_proj.weight.detach()
                W_V = layer.attn.v_proj.weight.detach()
                W_O = layer.attn.out_proj.weight.detach()
                
            elif arch == 'gpt2':  # GPT-2
                c_attn = layer.attn.c_attn.weight.detach()
                hidden_size = c_attn.shape[1] // 3
                W_Q = c_attn[:, :hidden_size].T
                W_K = c_attn[:, hidden_size:2*hidden_size].T
                W_V = c_attn[:, 2*hidden_size:].T
                W_O = layer.attn.c_proj.weight.detach().T
                
            elif arch == 'opt':  # OPT
                attn = layer.self_attn
                W_Q = attn.q_proj.weight.detach()
                W_K = attn.k_proj.weight.detach()
                W_V = attn.v_proj.weight.detach()
                W_O = attn.out_proj.weight.detach()
                
            elif arch == 'bloom':  # BLOOM
                qkv = layer.self_attention.query_key_value.weight.detach()
                hidden_size = qkv.shape[0] // 3
                W_Q = qkv[:hidden_size, :]
                W_K = qkv[hidden_size:2*hidden_size, :]
                W_V = qkv[2*hidden_size:, :]
                W_O = layer.self_attention.dense.weight.detach()
                
            elif arch == 'llama':  # Llama/Mistral
                W_Q = layer.self_attn.q_proj.weight.detach()
                W_K = layer.self_attn.k_proj.weight.detach()
                W_V = layer.self_attn.v_proj.weight.detach()
                W_O = layer.self_attn.o_proj.weight.detach()
            
            if W_V is not None:
                W_Q_list.append(W_Q.float())
                W_K_list.append(W_K.float())
                W_V_list.append(W_V.float())
                W_O_list.append(W_O.float())
            
        except Exception as e:
            print(f"  Layer {i} error: {e}")
            continue
    
    return {
        'W_Q': W_Q_list,
        'W_K': W_K_list,
        'W_V': W_V_list,
        'W_O': W_O_list,
        'arch': arch,
        'n_layers': len(layers)
    }

In [None]:
def compute_spectral_signature(matrices: List[torch.Tensor]) -> Dict[str, np.ndarray]:
    """
    Compute SVD spectral signatures.
    """
    if not matrices:
        return {}
    
    all_singular_values = []
    spectral_norms = []
    frobenius_norms = []
    effective_ranks = []
    
    for W in matrices:
        try:
            S = torch.linalg.svdvals(W)
            s = S.cpu().numpy()
            
            all_singular_values.append(s)
            spectral_norms.append(s[0])
            frobenius_norms.append(np.sqrt(np.sum(s**2)))
            
            s_norm = s / (s.sum() + 1e-10)
            entropy = -np.sum(s_norm * np.log(s_norm + 1e-10))
            effective_ranks.append(np.exp(entropy))
            
        except Exception as e:
            print(f"  SVD error: {e}")
            continue
    
    return {
        'singular_values': all_singular_values,
        'spectral_norm': np.array(spectral_norms),
        'frobenius_norm': np.array(frobenius_norms),
        'effective_rank': np.array(effective_ranks)
    }

In [None]:
def analyze_model(model_name: str, config: Dict) -> Optional[Dict]:
    """
    Full spectral analysis for a single model.
    """
    print(f"\n{'='*60}")
    print(f"Analyzing: {model_name}")
    print(f"Lab: {config['lab']}, Expected: {config['expected']}")
    print(f"{'='*60}")
    
    try:
        print("  Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32,
            low_cpu_mem_usage=True
        )
        model.eval()
        
        print("  Extracting projection matrices...")
        matrices = get_projection_matrices(model, model_name)
        
        if not matrices or not matrices.get('W_V'):
            print("  No matrices extracted!")
            del model
            gc.collect()
            return None
        
        print("  Computing spectral signatures...")
        results = {
            'model': model_name,
            'lab': config['lab'],
            'expected': config['expected'],
            'color': config['color'],
            'marker': config['marker'],
            'arch': matrices['arch'],
            'n_layers': matrices['n_layers']
        }
        
        for matrix_type in ['W_V', 'W_O', 'W_Q', 'W_K']:
            if matrices.get(matrix_type):
                spectral = compute_spectral_signature(matrices[matrix_type])
                if spectral:
                    results[f'{matrix_type}_spectral_norm'] = spectral['spectral_norm']
                    results[f'{matrix_type}_frobenius_norm'] = spectral['frobenius_norm']
                    results[f'{matrix_type}_effective_rank'] = spectral['effective_rank']
                    results[f'{matrix_type}_mean_spectral'] = float(np.mean(spectral['spectral_norm']))
                    results[f'{matrix_type}_std_spectral'] = float(np.std(spectral['spectral_norm']))
        
        del model
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        
        wv_mean = results.get('W_V_mean_spectral', 'N/A')
        wo_mean = results.get('W_O_mean_spectral', 'N/A')
        if isinstance(wv_mean, float):
            print(f"  W_V mean spectral norm: {wv_mean:.4f}")
        if isinstance(wo_mean, float):
            print(f"  W_O mean spectral norm: {wo_mean:.4f}")
        
        return results
        
    except Exception as e:
        print(f"  FAILED: {e}")
        import traceback
        traceback.print_exc()
        return None

## Run Analysis

In [None]:
# Run analysis on all models
all_results = []

for model_name, config in MODELS_CONFIG.items():
    result = analyze_model(model_name, config)
    if result:
        all_results.append(result)

print(f"\n\n{'='*60}")
print(f"Successfully analyzed {len(all_results)} / {len(MODELS_CONFIG)} models")
print(f"{'='*60}")

## Visualization

In [None]:
if all_results:
    fig, axes = plt.subplots(2, 2, figsize=(14, 12))
    
    # Plot 1: W_V Spectral Norm by Layer
    ax1 = axes[0, 0]
    for res in all_results:
        if 'W_V_spectral_norm' in res:
            ax1.plot(
                res['W_V_spectral_norm'],
                label=f"{res['model'].split('/')[-1]} ({res['lab']})",
                color=res['color'],
                marker=res['marker'],
                markersize=4,
                alpha=0.8
            )
    ax1.set_title('W_V Spectral Norm by Layer', fontsize=14)
    ax1.set_xlabel('Layer')
    ax1.set_ylabel('||W_V||_2')
    ax1.legend(fontsize=8)
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: W_O Spectral Norm by Layer
    ax2 = axes[0, 1]
    for res in all_results:
        if 'W_O_spectral_norm' in res:
            ax2.plot(
                res['W_O_spectral_norm'],
                label=f"{res['model'].split('/')[-1]} ({res['lab']})",
                color=res['color'],
                marker=res['marker'],
                markersize=4,
                alpha=0.8
            )
    ax2.set_title('W_O Spectral Norm by Layer', fontsize=14)
    ax2.set_xlabel('Layer')
    ax2.set_ylabel('||W_O||_2')
    ax2.legend(fontsize=8)
    ax2.grid(True, alpha=0.3)
    
    # Plot 3: Lab Comparison Bar Chart
    ax3 = axes[1, 0]
    lab_data = {}
    for res in all_results:
        lab = res['lab']
        if lab not in lab_data:
            lab_data[lab] = {'W_V': [], 'W_O': [], 'color': res['color']}
        if 'W_V_mean_spectral' in res:
            lab_data[lab]['W_V'].append(res['W_V_mean_spectral'])
        if 'W_O_mean_spectral' in res:
            lab_data[lab]['W_O'].append(res['W_O_mean_spectral'])
    
    labs = list(lab_data.keys())
    x = np.arange(len(labs))
    width = 0.35
    
    wv_means = [np.mean(lab_data[l]['W_V']) if lab_data[l]['W_V'] else 0 for l in labs]
    wo_means = [np.mean(lab_data[l]['W_O']) if lab_data[l]['W_O'] else 0 for l in labs]
    colors = [lab_data[l]['color'] for l in labs]
    
    ax3.bar(x - width/2, wv_means, width, label='W_V', color=colors, alpha=0.7)
    ax3.bar(x + width/2, wo_means, width, label='W_O', color=colors, alpha=0.4, hatch='//')
    ax3.set_title('Mean Spectral Norm by Lab', fontsize=14)
    ax3.set_xlabel('Lab')
    ax3.set_ylabel('Mean Spectral Norm')
    ax3.set_xticks(x)
    ax3.set_xticklabels(labs)
    ax3.legend()
    ax3.grid(True, alpha=0.3, axis='y')
    
    # Plot 4: Effective Rank
    ax4 = axes[1, 1]
    for res in all_results:
        if 'W_V_effective_rank' in res:
            ax4.plot(
                res['W_V_effective_rank'],
                label=f"{res['model'].split('/')[-1]}",
                color=res['color'],
                marker=res['marker'],
                markersize=4,
                alpha=0.8
            )
    ax4.set_title('W_V Effective Rank by Layer', fontsize=14)
    ax4.set_xlabel('Layer')
    ax4.set_ylabel('Effective Rank')
    ax4.legend(fontsize=8)
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('restriction_map_spectral_analysis.png', dpi=150, bbox_inches='tight')
    plt.show()
    print("\nFigure saved: restriction_map_spectral_analysis.png")
else:
    print("No results to visualize!")

## Summary & Hypothesis Test

In [None]:
if all_results:
    # Summary table
    summary_data = []
    for res in all_results:
        summary_data.append({
            'Model': res['model'].split('/')[-1],
            'Lab': res['lab'],
            'Expected': res['expected'],
            'Layers': res['n_layers'],
            'W_V Mean': res.get('W_V_mean_spectral', np.nan),
            'W_O Mean': res.get('W_O_mean_spectral', np.nan),
        })
    
    df = pd.DataFrame(summary_data)
    print("\n" + "="*80)
    print("SPECTRAL SIGNATURE SUMMARY")
    print("="*80)
    print(df.to_string(index=False))
    
    # Lab averages
    print("\n" + "="*80)
    print("LAB AVERAGES")
    print("="*80)
    lab_avg = df.groupby('Lab')[['W_V Mean', 'W_O Mean']].mean().round(4)
    print(lab_avg)
    
    # Hypothesis test
    from scipy import stats
    
    eleuther_wv = df[df['Lab'] == 'EleutherAI']['W_V Mean'].dropna().values
    others_wv = df[df['Lab'] != 'EleutherAI']['W_V Mean'].dropna().values
    
    print("\n" + "="*80)
    print("HYPOTHESIS TEST: EleutherAI vs Others")
    print("="*80)
    
    if len(eleuther_wv) > 0 and len(others_wv) > 0:
        print(f"\nEleutherAI W_V mean: {np.mean(eleuther_wv):.4f}")
        print(f"Others W_V mean: {np.mean(others_wv):.4f}")
        
        if len(eleuther_wv) >= 2 and len(others_wv) >= 2:
            stat, p = stats.mannwhitneyu(eleuther_wv, others_wv, alternative='two-sided')
            print(f"\nMann-Whitney U: {stat:.2f}, p = {p:.4f}")
            print(f"Significant difference: {'YES' if p < 0.05 else 'NO'}")
else:
    print("No data for analysis")

In [None]:
# Save results
if all_results:
    def convert_to_serializable(obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, (np.float32, np.float64)):
            return float(obj)
        elif isinstance(obj, (np.int32, np.int64)):
            return int(obj)
        return obj
    
    serializable_results = []
    for res in all_results:
        clean_res = {k: convert_to_serializable(v) for k, v in res.items()}
        serializable_results.append(clean_res)
    
    output = {
        'experiment': 'Restriction Map Spectral Analysis v2',
        'date': datetime.now().isoformat(),
        'hypothesis': 'Different labs have different spectral signatures in restriction maps',
        'models_tested': len(all_results),
        'results': serializable_results,
        'summary': df.to_dict('records') if len(df) > 0 else []
    }
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    filename = f'restriction_map_spectral_{timestamp}.json'
    
    with open(filename, 'w') as f:
        json.dump(output, f, indent=2)
    
    print(f"\nResults saved to: {filename}")
    
    # Also save summary CSV
    df.to_csv(f'restriction_map_summary_{timestamp}.csv', index=False)
    print(f"Summary saved to: restriction_map_summary_{timestamp}.csv")

In [None]:
print("\n" + "="*80)
print("ANALYSIS COMPLETE")
print("="*80)
print("\nDownload these files:")
print("  - restriction_map_spectral_analysis.png")
print("  - restriction_map_spectral_*.json")
print("  - restriction_map_summary_*.csv")

## Download Results (Colab)

In [None]:
# Download all result files from Colab
import os
import glob

try:
    from google.colab import files
    
    print("Downloading results from Colab...")
    
    # Download PNG
    if os.path.exists('restriction_map_spectral_analysis.png'):
        files.download('restriction_map_spectral_analysis.png')
        print("  Downloaded: restriction_map_spectral_analysis.png")
    
    # Download JSON files
    json_files = glob.glob('restriction_map_spectral_*.json')
    for f in json_files:
        files.download(f)
        print(f"  Downloaded: {f}")
    
    # Download CSV files
    csv_files = glob.glob('restriction_map_summary_*.csv')
    for f in csv_files:
        files.download(f)
        print(f"  Downloaded: {f}")
    
    print("\nAll files downloaded!")
    
except ImportError:
    print("Not running in Colab - files saved locally:")
    print(f"  - restriction_map_spectral_analysis.png")
    for f in glob.glob('restriction_map_*.json'):
        print(f"  - {f}")
    for f in glob.glob('restriction_map_*.csv'):
        print(f"  - {f}")