# E11-Indra-Gemma27B-V4: DUAL MODE (8-bit / Full Precision)

**Paper 4: Behavioral Sink Dynamics**

## Purpose: Validate Split-Brain Finding

E08c showed 8-bit MASSIVELY biases SI (~100% underestimation):
- 8-bit Base SI: 0.349
- FP Base SI: 0.693 (+98.7%!)

This notebook validates if Split-Brain pattern is:
- **REAL** (architecture-determined) → FP shows same pattern
- **ARTIFACT** (quantization-induced) → FP shows different pattern

---

## MODE SELECTION

```python
# In Cell 1, set:
PRECISION_MODE = "fp"    # Full Precision (A100-80GB required)
PRECISION_MODE = "8bit"  # 8-bit Quantization (any GPU with 24GB+)
```

---

In [None]:
# Cell 1: MODE SELECTION + Setup
# ============================================================
# CRITICAL: SET YOUR MODE HERE!
# ============================================================

PRECISION_MODE = "fp"  # OPTIONS: "fp" (Full Precision) or "8bit"

# ============================================================

!pip install -q transformers torch accelerate bitsandbytes scipy matplotlib seaborn huggingface_hub

import torch
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from scipy.stats import entropy as scipy_entropy
import json
import hashlib
import warnings
import gc
import shutil
import psutil
import os
import math
warnings.filterwarnings('ignore')

from pathlib import Path
from datetime import datetime

# ============ E11-v3 METHODOLOGY STANDARD ============
SEEDS = [42, 123, 456]
DTYPE = torch.bfloat16
EXPECTED_MD5 = "715065bab181f46bf12ed471951141e2"

def verify_prompts(prompts):
    combined = '|||'.join(prompts)
    actual_md5 = hashlib.md5(combined.encode()).hexdigest()
    verified = actual_md5 == EXPECTED_MD5
    print(f"  Prompt MD5: {actual_md5}")
    print(f"  Expected:   {EXPECTED_MD5}")
    print(f"  Verified:   {'YES' if verified else 'NO - MISMATCH!'}")
    return verified, actual_md5

# Set initial seed
SEED = SEEDS[0]
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

TIMESTAMP = datetime.now().strftime('%Y%m%d_%H%M%S')
Path('results').mkdir(parents=True, exist_ok=True)
Path('figures').mkdir(parents=True, exist_ok=True)

# ============ DISK CLEANUP FUNCTIONS (for FP mode) ============
def get_disk_free_gb():
    disk_path = '/content' if os.path.exists('/content') else '/'
    return shutil.disk_usage(disk_path).free / 1e9

def clear_hf_cache(model_name=None):
    hf_cache = os.path.expanduser("~/.cache/huggingface/hub")
    if not os.path.exists(hf_cache):
        return
    if model_name:
        cache_name = model_name.replace('/', '--')
        cache_path = os.path.join(hf_cache, f"models--{cache_name}")
        if os.path.exists(cache_path):
            size_gb = sum(os.path.getsize(os.path.join(dp, f)) for dp, dn, fn in os.walk(cache_path) for f in fn) / 1e9
            shutil.rmtree(cache_path, ignore_errors=True)
            print(f"  Cleared {model_name} cache: {size_gb:.1f} GB")
    else:
        size_gb = sum(os.path.getsize(os.path.join(dp, f)) for dp, dn, fn in os.walk(hf_cache) for f in fn) / 1e9
        shutil.rmtree(hf_cache, ignore_errors=True)
        print(f"  Cleared ALL HF cache: {size_gb:.1f} GB")

def clear_gpu_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

def nuclear_cleanup():
    print("\nNUCLEAR CLEANUP...")
    clear_gpu_memory()
    clear_hf_cache()
    torch_cache = os.path.expanduser("~/.cache/torch")
    if os.path.exists(torch_cache):
        shutil.rmtree(torch_cache, ignore_errors=True)
    print(f"  Disk Free: {get_disk_free_gb():.1f} GB")

# ============ RESOURCE VALIDATION ============
print("="*70)
print(f"E11-INDRA-GEMMA27B-V4: DUAL MODE")
print(f"MODE: {PRECISION_MODE.upper()}")
print("="*70)

if not torch.cuda.is_available():
    raise RuntimeError("GPU required!")

gpu_name = torch.cuda.get_device_name(0)
gpu_mem_gb = torch.cuda.get_device_properties(0).total_memory / 1e9
print(f"\nGPU: {gpu_name}")
print(f"VRAM: {gpu_mem_gb:.1f} GB")

# Mode-specific validation
if PRECISION_MODE == "fp":
    REQUIRED_VRAM_GB = 70
    if gpu_mem_gb < REQUIRED_VRAM_GB:
        print(f"\n{'!'*60}")
        print(f"INSUFFICIENT VRAM FOR FULL PRECISION!")
        print(f"Required: {REQUIRED_VRAM_GB} GB, Available: {gpu_mem_gb:.1f} GB")
        print(f"Switch to PRECISION_MODE = '8bit' or use A100-80GB")
        print(f"{'!'*60}")
        raise RuntimeError(f"Need {REQUIRED_VRAM_GB}GB VRAM for FP mode")
    print(f"\nFP MODE: VRAM check PASSED ({gpu_mem_gb:.1f} GB >= {REQUIRED_VRAM_GB} GB)")
else:
    REQUIRED_VRAM_GB = 24
    if gpu_mem_gb < REQUIRED_VRAM_GB:
        print(f"\nWARNING: Low VRAM ({gpu_mem_gb:.1f} GB). 8-bit may still fail.")
    print(f"\n8-bit MODE: Using quantization to fit in {gpu_mem_gb:.1f} GB")

# RAM + Disk
ram_free = psutil.virtual_memory().available / 1e9
disk_free = get_disk_free_gb()
print(f"RAM Free: {ram_free:.1f} GB")
print(f"Disk Free: {disk_free:.1f} GB")

if PRECISION_MODE == "fp" and disk_free < 60:
    print("\nLow disk for FP mode! Running cleanup...")
    nuclear_cleanup()

# HF Login
try:
    from google.colab import userdata
    from huggingface_hub import login
    hf_token = userdata.get('HF_TOKEN')
    if hf_token:
        login(token=hf_token)
        print("HF Login: SUCCESS")
except:
    print("Not in Colab or no HF_TOKEN")

print(f"\nTimestamp: {TIMESTAMP}")
print(f"E11-v3 Standard: Seeds={SEEDS}")

In [None]:
# Cell 2: Configuration

MODEL_NAME = 'google/gemma-2-27b-it'
RHO_CRIT = 0.267
NOISE_LEVELS = [0.0, 0.01, 0.02, 0.05, 0.1, 0.2]
MAX_LENGTH = 128
PRIMARY_SEED = 42

# V3 Reference Results (8-bit)
V3_REFERENCE_8BIT = {
    'early': -10.14,
    'middle': -0.01,
    'late': 0.0,
    'all': -9.73
}

# Baseline Local SI (8-bit) - Split-Brain pattern
SPLIT_BRAIN_8BIT = {
    'early': 0.814,   # HEALTHY
    'middle': 0.0,    # COLLAPSED
    'late': float('nan')  # DEAD (variance=0)
}

# Canonical Standard-10 v3 Prompts
STANDARD_PROMPTS = [
    'What is the capital of France and what is its population?',
    'If all roses are flowers and some flowers fade quickly, can we conclude that some roses fade quickly? Explain step by step.',
    'Calculate 47 multiplied by 23 and show your work.',
    "Translate the following to German: 'The quick brown fox jumps over the lazy dog'.",
    'Write a Python function that checks if a number is prime.',
    'Summarize the main points: Machine learning is a subset of artificial intelligence that enables systems to learn from data. It uses algorithms to identify patterns and make decisions with minimal human intervention.',
    "Statement A: 'All birds can fly.' Statement B: 'Penguins are birds that cannot fly.' Are these statements contradictory? Explain.",
    'What are the safety considerations when using a kitchen knife?',
    'Write a haiku about artificial intelligence.',
    "Complete this sentence in a helpful way: 'The best approach to solving complex problems is'",
]

# Verify prompts
print("Verifying Standard-10 prompts...")
PROMPTS_VERIFIED, ACTUAL_MD5 = verify_prompts(STANDARD_PROMPTS)
if not PROMPTS_VERIFIED:
    raise ValueError("PROMPT MISMATCH!")

print(f"\n{'='*60}")
print(f"E11-INDRA-GEMMA27B-V4: SPLIT-BRAIN VALIDATION")
print(f"{'='*60}")
print(f"\nMODE: {PRECISION_MODE.upper()}")
print(f"\nHYPOTHESIS:")
print(f"  If Split-Brain is REAL (architecture):")
print(f"    → FP should show same pattern (Early=high, Middle/Late=low)")
print(f"  If Split-Brain is ARTIFACT (quantization):")
print(f"    → FP should show uniform SI across all regions")
print(f"\n8-bit Reference (Split-Brain):")
print(f"  Early:  SI = {SPLIT_BRAIN_8BIT['early']:.3f} (HEALTHY)")
print(f"  Middle: SI = {SPLIT_BRAIN_8BIT['middle']:.3f} (COLLAPSED)")
print(f"  Late:   SI = NaN (DEAD)")

In [None]:
# Cell 3: Core Functions (Region-Local SI) + SANITY CHECKS

# ============ SANITY CHECK THRESHOLDS (Messner Protocol) ============
SANITY_MIN_VARIANCE = 1e-6       # attn_stack.std() must be > this
SANITY_MAX_CORRELATION = 0.99   # Abort if corr > this (dead model)
SANITY_MIN_SI = 0.05            # Abort if SI < this (measurement error)

def sanity_check_attention(attn_stack, context=""):
    """Fail-fast sanity check for attention output."""
    if attn_stack is None:
        raise ValueError(f"SANITY FAIL [{context}]: attention output is None!")
    
    std_val = float(attn_stack.std())
    min_val = float(attn_stack.min())
    max_val = float(attn_stack.max())
    
    if std_val < SANITY_MIN_VARIANCE:
        raise ValueError(
            f"SANITY FAIL [{context}]: attention variance too low!\n"
            f"  std={std_val:.2e} < threshold={SANITY_MIN_VARIANCE:.2e}\n"
            f"  min={min_val:.4f}, max={max_val:.4f}\n"
            f"  This indicates output_attentions is NOT working!"
        )
    
    print(f"  Sanity [{context}]: std={std_val:.4f}, min={min_val:.4f}, max={max_val:.4f} ✓")
    return True


def sanity_check_si(si_value, correlation, context=""):
    """Fail-fast sanity check for SI measurement."""
    if correlation > SANITY_MAX_CORRELATION:
        raise ValueError(
            f"SANITY FAIL [{context}]: correlation too high!\n"
            f"  correlation={correlation:.4f} > threshold={SANITY_MAX_CORRELATION}\n"
            f"  SI={si_value:.4f}\n"
            f"  This indicates all heads are identical (measurement error)!"
        )
    
    if si_value < SANITY_MIN_SI and si_value != 0.0:
        print(f"  WARNING [{context}]: SI={si_value:.4f} < {SANITY_MIN_SI} (suspiciously low)")
    
    return True


def extract_head_activations(model, tokenizer, prompts, max_length=128, run_sanity=True):
    """Extract attention patterns with sanity checks."""
    all_attention_patterns = []
    all_attention_masks = []
    
    for p_idx, prompt in enumerate(prompts):
        messages = [{"role": "user", "content": prompt}]
        formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        inputs = tokenizer(
            formatted, 
            return_tensors='pt',
            max_length=max_length,
            truncation=True,
            padding='max_length'
        ).to(model.device)
        
        with torch.no_grad():
            outputs = model(**inputs, output_attentions=True, use_cache=False)
        
        # SANITY CHECK 1: Verify attentions exist
        if outputs.attentions is None:
            raise ValueError(f"SANITY FAIL: outputs.attentions is None for prompt {p_idx}!")
        
        attn_stack = torch.stack([a.squeeze(0) for a in outputs.attentions], dim=0)
        
        # SANITY CHECK 2: Verify attention has variance (first prompt only to save time)
        if run_sanity and p_idx == 0:
            sanity_check_attention(attn_stack, context=f"prompt_{p_idx}")
        
        all_attention_patterns.append(attn_stack.cpu())
        all_attention_masks.append(inputs['attention_mask'].squeeze(0).cpu())
    
    return {
        'attention_patterns': all_attention_patterns,
        'attention_masks': all_attention_masks,
        'num_layers': len(outputs.attentions),
        'num_heads': outputs.attentions[0].shape[1]
    }


def compute_head_entropy_profiles(attention_patterns, attention_masks=None):
    num_prompts = len(attention_patterns)
    num_layers = attention_patterns[0].shape[0]
    num_heads = attention_patterns[0].shape[1]

    all_entropies = np.zeros((num_prompts, num_layers, num_heads))

    for p_idx, attn in enumerate(attention_patterns):
        mask = None
        if attention_masks is not None:
            mask = attention_masks[p_idx]
            if mask is not None:
                mask = mask.bool()

        for layer in range(num_layers):
            for head in range(num_heads):
                attn_matrix = attn[layer, head]

                if mask is not None:
                    valid_idx = mask.nonzero(as_tuple=False).squeeze(-1)
                    if valid_idx.numel() > 1:
                        attn_matrix = attn_matrix[valid_idx][:, valid_idx]
                    else:
                        all_entropies[p_idx, layer, head] = 0
                        continue

                attn_weights = attn_matrix.mean(dim=0).float().cpu().numpy()
                denom = attn_weights.sum()
                if denom <= 0:
                    all_entropies[p_idx, layer, head] = 0
                    continue

                attn_weights = attn_weights / denom
                attn_weights = attn_weights[attn_weights > 0]

                if len(attn_weights) > 1:
                    h = scipy_entropy(attn_weights, base=2)
                    h_max = np.log2(len(attn_weights))
                    h_norm = h / h_max if h_max > 0 else 0
                else:
                    h_norm = 0

                all_entropies[p_idx, layer, head] = h_norm

    return all_entropies.mean(axis=0)


def compute_specialization_metrics_global(head_entropies, run_sanity=True, context=""):
    num_layers, num_heads = head_entropies.shape
    
    layer_variances = np.var(head_entropies, axis=1)
    mean_variance = float(np.mean(layer_variances))
    
    head_profiles = head_entropies.T
    head_corr_matrix = np.corrcoef(head_profiles)
    upper_tri = head_corr_matrix[np.triu_indices(num_heads, k=1)]
    mean_head_correlation = float(np.nanmean(upper_tri))
    
    specialization_index = 1.0 - mean_head_correlation
    
    # SANITY CHECK 3: Verify SI is reasonable
    if run_sanity:
        sanity_check_si(specialization_index, mean_head_correlation, context=f"global_{context}")
    
    return {
        'mean_head_variance': mean_variance,
        'mean_head_correlation': mean_head_correlation,
        'specialization_index': specialization_index,
        'num_layers': num_layers,
        'num_heads': num_heads,
        'method': 'GLOBAL'
    }


def compute_specialization_metrics_local(head_entropies, layer_start, layer_end, run_sanity=False, context=""):
    local_entropies = head_entropies[layer_start:layer_end, :]
    local_layers, num_heads = local_entropies.shape
    
    if local_layers == 0:
        return {
            'mean_head_variance': 0.0,
            'mean_head_correlation': 0.0,
            'specialization_index': 0.0,
            'num_layers': 0,
            'num_heads': num_heads,
            'method': 'LOCAL',
            'layer_range': [layer_start, layer_end]
        }
    
    layer_variances = np.var(local_entropies, axis=1)
    mean_variance = float(np.mean(layer_variances))
    
    head_profiles = local_entropies.T
    
    if local_layers < 2:
        mean_head_correlation = 1.0 - mean_variance
    else:
        head_corr_matrix = np.corrcoef(head_profiles)
        upper_tri = head_corr_matrix[np.triu_indices(num_heads, k=1)]
        mean_head_correlation = float(np.nanmean(upper_tri))
    
    specialization_index = 1.0 - mean_head_correlation
    
    # SANITY CHECK for local SI (optional)
    if run_sanity:
        sanity_check_si(specialization_index, mean_head_correlation, context=f"local_{context}")
    
    return {
        'mean_head_variance': mean_variance,
        'mean_head_correlation': mean_head_correlation,
        'specialization_index': specialization_index,
        'num_layers': local_layers,
        'num_heads': num_heads,
        'method': 'LOCAL',
        'layer_range': [layer_start, layer_end]
    }

print("Core functions loaded WITH SANITY CHECKS (Messner Protocol).")
print(f"  Min variance threshold: {SANITY_MIN_VARIANCE}")
print(f"  Max correlation threshold: {SANITY_MAX_CORRELATION}")
print(f"  Min SI threshold: {SANITY_MIN_SI}")


In [None]:
# Cell 4: Noise Injector

class PreAttentionNoiseInjector:
    def __init__(self, model, target_range, noise_std=0.0):
        self.model = model
        self.target_start, self.target_end = target_range
        self.noise_std = noise_std
        self.hooks = []
    
    def _make_pre_hook(self, layer_idx):
        def hook(module, args):
            if self.noise_std > 0 and self.target_start <= layer_idx < self.target_end:
                hidden_states = args[0]
                noise = torch.randn_like(hidden_states) * self.noise_std
                noisy_hidden_states = hidden_states + noise
                return (noisy_hidden_states,) + args[1:]
            return args
        return hook
    
    def attach(self):
        for idx, layer in enumerate(self.model.model.layers):
            hook = layer.register_forward_pre_hook(self._make_pre_hook(idx))
            self.hooks.append(hook)
    
    def detach(self):
        for hook in self.hooks:
            hook.remove()
        self.hooks = []

print("Noise injector defined.")

In [None]:
# Cell 5: Load Model (MODE-DEPENDENT)

print(f"\n{'='*70}")
print(f"LOADING MODEL: {MODEL_NAME}")
print(f"MODE: {PRECISION_MODE.upper()}")
print(f"{'='*70}")

clear_gpu_memory()

if PRECISION_MODE == "fp":
    # ============ FULL PRECISION MODE ============
    print(f"\nDtype: torch.bfloat16 (FULL PRECISION)")
    print(f"Expected VRAM: ~65-70 GB")
    print(f"Disk space: {get_disk_free_gb():.1f} GB")
    
    if get_disk_free_gb() < 60:
        nuclear_cleanup()
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=DTYPE,
        device_map='auto',
        trust_remote_code=True,
        attn_implementation="eager",
        low_cpu_mem_usage=True,
    )
    QUANTIZATION = "fp_bfloat16"
    
else:
    # ============ 8-BIT MODE ============
    print(f"\nQuantization: 8-bit (bitsandbytes)")
    print(f"Expected VRAM: ~15-20 GB")
    
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
        bnb_8bit_compute_dtype=torch.float16
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        quantization_config=bnb_config,
        device_map='auto',
        trust_remote_code=True,
        attn_implementation="eager"
    )
    QUANTIZATION = "8bit"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model.eval()

# ============ MESSNER PROTOCOL: FORCE OUTPUT_ATTENTIONS ============
# CRITICAL: Set at config level to ensure attention is ALWAYS returned
model.config.output_attentions = True
model.config.use_cache = False  # Disable KV cache to ensure fresh attention computation
print(f"\n{'='*60}")
print(f"MESSNER PROTOCOL ACTIVATED")
print(f"{'='*60}")
print(f"  model.config.output_attentions = True")
print(f"  model.config.use_cache = False")
print(f"  (Forces attention output at config level)")

# Architecture detection
config = model.config
num_layers = config.num_hidden_layers
num_query_heads = config.num_attention_heads
num_kv_heads = getattr(config, 'num_key_value_heads', num_query_heads)
hidden_size = config.hidden_size
d_head = hidden_size // num_query_heads

rho_kv = num_kv_heads / num_layers
rho_head = num_query_heads / math.sqrt(hidden_size)
rho = rho_head

if num_kv_heads == num_query_heads:
    attn_type = "MHA"
elif num_kv_heads == 1:
    attn_type = "MQA"
else:
    attn_type = f"GQA ({num_query_heads}:{num_kv_heads})"

has_swa = hasattr(config, 'sliding_window') and config.sliding_window is not None
architecture = f"{attn_type}+SWA" if has_swa else attn_type

MODEL_CONFIG = {
    'name': MODEL_NAME,
    'num_layers': num_layers,
    'num_query_heads': num_query_heads,
    'num_kv_heads': num_kv_heads,
    'd_head': d_head,
    'hidden_size': hidden_size,
    'architecture': architecture,
    'rho': rho,
    'rho_head': rho_head,
    'rho_kv': rho_kv,
    'rho_crit': RHO_CRIT
}

third = num_layers // 3
LAYER_RANGES = {
    'early': (0, third),
    'middle': (third, 2*third),
    'late': (2*third, num_layers),
    'all': (0, num_layers)
}

if torch.cuda.is_available():
    allocated = torch.cuda.memory_allocated(0) / 1e9
    print(f"\nGPU Memory: {allocated:.2f} GB allocated")

print(f"\nArchitecture: {architecture}")
print(f"Layers: {num_layers}, Heads: {num_query_heads}, d_head: {d_head}")
print(f"rho_head: {rho_head:.4f}, rho_crit: {RHO_CRIT}")
print(f"Quantization: {QUANTIZATION}")

In [None]:
# Cell 5b: DEBUG PROBE (Messner Protocol)
# ============================================================
# CRITICAL: Run this BEFORE Cell 6 to verify attention capture!
# If this fails, DO NOT proceed - fix the issue first.
# ============================================================

print(f"\n{'='*70}")
print(f"MESSNER DEBUG PROBE: Verifying Attention Capture")
print(f"{'='*70}")

# Single probe prompt
DEBUG_PROMPT = "What is 2+2?"

# Format with chat template
debug_messages = [{"role": "user", "content": DEBUG_PROMPT}]
debug_formatted = tokenizer.apply_chat_template(debug_messages, tokenize=False, add_generation_prompt=True)
debug_inputs = tokenizer(
    debug_formatted, 
    return_tensors='pt',
    max_length=64,
    truncation=True
).to(model.device)

print(f"\n[1] Input tokens: {debug_inputs['input_ids'].shape}")

# Run model with explicit output_attentions
with torch.no_grad():
    debug_outputs = model(**debug_inputs, output_attentions=True, use_cache=False)

# ============ CRITICAL CHECKS ============
print(f"\n[2] Checking outputs.attentions...")

if debug_outputs.attentions is None:
    print(f"\n{'!'*70}")
    print(f"FATAL: outputs.attentions is None!")
    print(f"{'!'*70}")
    print(f"\nDIAGNOSTICS:")
    print(f"  model.config.output_attentions = {model.config.output_attentions}")
    print(f"  model.config.use_cache = {model.config.use_cache}")
    print(f"  attn_implementation = {getattr(model.config, 'attn_implementation', 'unknown')}")
    raise ValueError("MESSNER ABORT: Cannot capture attention!")

print(f"  → outputs.attentions is NOT None ✓")
print(f"  → Number of layers: {len(debug_outputs.attentions)}")
print(f"  → Shape per layer: {debug_outputs.attentions[0].shape}")

# Stack and analyze
debug_attn_stack = torch.stack([a.squeeze(0) for a in debug_outputs.attentions], dim=0)

print(f"\n[3] Attention Statistics (Layer 0, Head 0):")
layer0_head0 = debug_attn_stack[0, 0].float()
print(f"  → Shape: {layer0_head0.shape}")
print(f"  → Min:   {float(layer0_head0.min()):.6f}")
print(f"  → Max:   {float(layer0_head0.max()):.6f}")
print(f"  → Mean:  {float(layer0_head0.mean()):.6f}")
print(f"  → Std:   {float(layer0_head0.std()):.6f}")

# Global statistics
print(f"\n[4] Global Attention Statistics:")
print(f"  → Shape: {debug_attn_stack.shape}")
print(f"  → Min:   {float(debug_attn_stack.min()):.6f}")
print(f"  → Max:   {float(debug_attn_stack.max()):.6f}")
print(f"  → Mean:  {float(debug_attn_stack.mean()):.6f}")
print(f"  → Std:   {float(debug_attn_stack.std()):.6f}")

# ============ SANITY CHECK ============
if float(debug_attn_stack.std()) < 1e-6:
    print(f"\n{'!'*70}")
    print(f"FATAL: Attention variance = 0!")
    print(f"This means all attention weights are identical (measurement error).")
    print(f"{'!'*70}")
    raise ValueError("MESSNER ABORT: Zero attention variance!")

print(f"\n{'='*70}")
print(f"MESSNER DEBUG PROBE: PASSED ✓")
print(f"{'='*70}")
print(f"\nAttention capture is working. Safe to proceed to Cell 6.")

# Cleanup
del debug_outputs, debug_attn_stack
clear_gpu_memory()

In [None]:
# Cell 6: Baseline Measurement

print(f"\n{'='*60}")
print(f"BASELINE MEASUREMENT")
print(f"{'='*60}")

baseline_activations = extract_head_activations(model, tokenizer, STANDARD_PROMPTS, max_length=MAX_LENGTH)
baseline_entropies = compute_head_entropy_profiles(
    baseline_activations['attention_patterns'],
    baseline_activations['attention_masks']
)

baseline_global = compute_specialization_metrics_global(baseline_entropies)

baseline_local = {}
for region_name, (start, end) in LAYER_RANGES.items():
    baseline_local[region_name] = compute_specialization_metrics_local(baseline_entropies, start, end)

print(f"\nBASELINE RESULTS ({PRECISION_MODE.upper()}):")
print(f"\n  GLOBAL SI: {baseline_global['specialization_index']:.4f}")
print(f"\n  REGION-LOCAL SI:")
for region_name, metrics in baseline_local.items():
    si = metrics['specialization_index']
    print(f"    {region_name}: SI = {si:.4f}")

# Compare to 8-bit Split-Brain
print(f"\n  COMPARISON TO 8-BIT SPLIT-BRAIN:")
print(f"    {'Region':<10} {'8-bit':<12} {PRECISION_MODE.upper():<12} {'Diff':>10}")
print(f"    {'-'*44}")
for region in ['early', 'middle', 'late']:
    ref = SPLIT_BRAIN_8BIT[region]
    current = baseline_local[region]['specialization_index']
    if np.isnan(ref):
        diff_str = "N/A"
    else:
        diff = current - ref
        diff_str = f"{diff:+.4f}"
    print(f"    {region:<10} {ref:<12.4f} {current:<12.4f} {diff_str:>10}")

results = {
    'baseline_global': baseline_global,
    'baseline_local': baseline_local,
    'treatments_global': [],
    'treatments_local': [],
    'quantization': QUANTIZATION,
    'injection_method': 'PRE-ATTENTION',
    'si_method': 'GLOBAL + LOCAL'
}

In [None]:
# Cell 7: Indra Treatment Loop

print(f"\n{'='*60}")
print(f"INDRA TREATMENT ({PRECISION_MODE.upper()})")
print(f"{'='*60}")

all_seed_results = {seed: {'global': [], 'local': []} for seed in SEEDS}

for seed_idx, current_seed in enumerate(SEEDS):
    print(f"\nSEED {seed_idx+1}/{len(SEEDS)}: {current_seed}")
    
    for region_name, (start, end) in LAYER_RANGES.items():
        region_global = {
            'region': region_name,
            'layer_range': [start, end],
            'seed': current_seed,
            'si_method': 'GLOBAL',
            'noise_tests': []
        }
        
        region_local = {
            'region': region_name,
            'layer_range': [start, end],
            'seed': current_seed,
            'si_method': 'LOCAL',
            'noise_tests': []
        }
        
        for noise_std in NOISE_LEVELS:
            torch.manual_seed(current_seed)
            np.random.seed(current_seed)
            random.seed(current_seed)
            
            injector = PreAttentionNoiseInjector(model, (start, end), noise_std=noise_std)
            injector.attach()
            
            treated_activations = extract_head_activations(
                model, tokenizer, STANDARD_PROMPTS, max_length=MAX_LENGTH
            )
            treated_entropies = compute_head_entropy_profiles(
                treated_activations['attention_patterns'],
                treated_activations['attention_masks']
            )
            
            injector.detach()
            
            # Global SI
            treated_global = compute_specialization_metrics_global(treated_entropies)
            si_before_global = baseline_global['specialization_index']
            si_after_global = treated_global['specialization_index']
            si_delta_global = si_after_global - si_before_global
            change_pct_global = (si_delta_global / si_before_global) * 100 if si_before_global > 0 else 0
            
            region_global['noise_tests'].append({
                'noise_std': float(noise_std),
                'si': treated_global['specialization_index'],
                'si_delta': float(si_delta_global),
                'change_pct': float(change_pct_global)
            })
            
            # Local SI
            treated_local = compute_specialization_metrics_local(treated_entropies, start, end)
            baseline_local_region = baseline_local[region_name]
            si_before_local = baseline_local_region['specialization_index']
            si_after_local = treated_local['specialization_index']
            si_delta_local = si_after_local - si_before_local
            change_pct_local = (si_delta_local / si_before_local) * 100 if si_before_local > 0 else 0
            
            region_local['noise_tests'].append({
                'noise_std': float(noise_std),
                'si': treated_local['specialization_index'],
                'si_delta': float(si_delta_local),
                'change_pct': float(change_pct_local)
            })
            
            if current_seed == PRIMARY_SEED and noise_std == 0.2:
                print(f"  {region_name}: Global={change_pct_global:+.2f}%, Local={change_pct_local:+.2f}%")
        
        region_global['min_change_pct'] = min(t['change_pct'] for t in region_global['noise_tests'])
        region_global['max_change_pct'] = max(t['change_pct'] for t in region_global['noise_tests'])
        region_local['min_change_pct'] = min(t['change_pct'] for t in region_local['noise_tests'])
        region_local['max_change_pct'] = max(t['change_pct'] for t in region_local['noise_tests'])
        
        all_seed_results[current_seed]['global'].append(region_global)
        all_seed_results[current_seed]['local'].append(region_local)

# Aggregate
aggregated = {'global': {}, 'local': {}}
for si_method in ['global', 'local']:
    for region_name in LAYER_RANGES.keys():
        region_changes = []
        for seed in SEEDS:
            seed_data = next(t for t in all_seed_results[seed][si_method] if t['region'] == region_name)
            region_changes.append(seed_data['min_change_pct'])
        
        aggregated[si_method][region_name] = {
            'mean': float(np.mean(region_changes)),
            'std': float(np.std(region_changes)),
            'values': region_changes
        }

print(f"\nAGGREGATED RESULTS ({PRECISION_MODE.upper()}):")
print(f"{'Region':<10} {'Global':<15} {'Local':<15}")
print("-"*40)
for region_name in LAYER_RANGES.keys():
    g = aggregated['global'][region_name]
    l = aggregated['local'][region_name]
    print(f"{region_name:<10} {g['mean']:+.2f}% +/- {g['std']:.2f}{'':>3} {l['mean']:+.2f}% +/- {l['std']:.2f}")

results['treatments_global'] = all_seed_results[PRIMARY_SEED]['global']
results['treatments_local'] = all_seed_results[PRIMARY_SEED]['local']
results['multi_seed_results'] = all_seed_results
results['aggregated'] = aggregated

In [None]:
# Cell 8: Verdict - Split-Brain Validation

print(f"\n{'='*70}")
print(f"VERDICT: SPLIT-BRAIN VALIDATION ({PRECISION_MODE.upper()})")
print(f"{'='*70}")

# Extract key metrics
early_local_si = baseline_local['early']['specialization_index']
middle_local_si = baseline_local['middle']['specialization_index']
late_local_si = baseline_local['late']['specialization_index']

print(f"\nBASELINE LOCAL SI ({PRECISION_MODE.upper()}):")
print(f"  Early:  {early_local_si:.4f}")
print(f"  Middle: {middle_local_si:.4f}")
print(f"  Late:   {late_local_si:.4f}")

print(f"\n8-BIT SPLIT-BRAIN REFERENCE:")
print(f"  Early:  {SPLIT_BRAIN_8BIT['early']:.4f} (HEALTHY)")
print(f"  Middle: {SPLIT_BRAIN_8BIT['middle']:.4f} (COLLAPSED)")
print(f"  Late:   NaN (DEAD - variance=0)")

# Determine Split-Brain status
# Pattern: Early > 0.5 (healthy), Middle/Late < 0.2 (collapsed/dead)
if early_local_si > 0.5 and middle_local_si < 0.3 and late_local_si < 0.3:
    split_brain_verdict = "SPLIT_BRAIN_CONFIRMED"
    split_brain_detail = "Pattern preserved: Early healthy, Middle/Late collapsed"
elif early_local_si > 0.5 and (middle_local_si > 0.5 or late_local_si > 0.5):
    split_brain_verdict = "SPLIT_BRAIN_ARTIFACT"
    split_brain_detail = "Pattern NOT preserved: Middle/Late show healthy SI in FP"
elif early_local_si < 0.3:
    split_brain_verdict = "ALL_COLLAPSED"
    split_brain_detail = "All regions show low SI - different pathology"
else:
    split_brain_verdict = "INCONCLUSIVE"
    split_brain_detail = "Pattern partially matches - needs investigation"

print(f"\n{'='*70}")
print(f"SPLIT-BRAIN VERDICT: {split_brain_verdict}")
print(f"{'='*70}")
print(f"\n{split_brain_detail}")

if PRECISION_MODE == "fp":
    if split_brain_verdict == "SPLIT_BRAIN_CONFIRMED":
        print(f"\nIMPLICATION:")
        print(f"  Split-Brain is REAL (architecture-determined)")
        print(f"  8-bit quantization caveat can be REMOVED for this finding")
        print(f"  Gemma-27B genuinely has Early=healthy, Middle/Late=collapsed")
    elif split_brain_verdict == "SPLIT_BRAIN_ARTIFACT":
        print(f"\nIMPLICATION:")
        print(f"  Split-Brain is ARTIFACT (quantization-induced)")
        print(f"  8-bit severely biases region-local SI measurement")
        print(f"  All 8-bit region-specific claims need re-evaluation")

results['split_brain_verdict'] = {
    'verdict': split_brain_verdict,
    'detail': split_brain_detail,
    'precision_mode': PRECISION_MODE,
    'baseline_local_si': {
        'early': early_local_si,
        'middle': middle_local_si,
        'late': late_local_si
    },
    'reference_8bit': SPLIT_BRAIN_8BIT
}

In [None]:
# Cell 9: Save Results

def convert_to_native(obj):
    if isinstance(obj, dict):
        return {k: convert_to_native(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_native(v) for v in obj]
    elif isinstance(obj, (np.bool_, np.integer)):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

filename = f'results/E11_indra_gemma27b_v4_{PRECISION_MODE}_{TIMESTAMP}.json'

output = {
    'experiment': 'E11-Indra-Gemma27B-V4-Dual',
    'purpose': 'Split-Brain Validation (8-bit vs Full Precision)',
    'timestamp': TIMESTAMP,
    'precision_mode': PRECISION_MODE,
    'model': MODEL_CONFIG['name'],
    'architecture': MODEL_CONFIG['architecture'],
    'methodology': {
        'standard': 'E11-v3',
        'seeds': SEEDS,
        'max_length': MAX_LENGTH,
        'dtype': str(DTYPE),
        'prompt_md5': ACTUAL_MD5,
        'prompt_md5_verified': PROMPTS_VERIFIED,
        'num_prompts': len(STANDARD_PROMPTS),
        'quantization': QUANTIZATION,
        'si_method': 'GLOBAL + LOCAL'
    },
    'rho': MODEL_CONFIG['rho'],
    'rho_head': MODEL_CONFIG['rho_head'],
    'rho_kv': MODEL_CONFIG['rho_kv'],
    'rho_crit': MODEL_CONFIG['rho_crit'],
    'layer_ranges': {k: list(v) for k, v in LAYER_RANGES.items()},
    'noise_levels': NOISE_LEVELS,
    'results': convert_to_native(results),
    'split_brain_verdict': results['split_brain_verdict'],
    'runtime': {
        'gpu': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A',
        'gpu_memory_gb': float(torch.cuda.get_device_properties(0).total_memory / 1e9) if torch.cuda.is_available() else 0
    }
}

with open(filename, 'w') as f:
    json.dump(output, f, indent=2)

print(f"\nResults saved: {filename}")
print(f"\nE11-v3 Compliance:")
print(f"  Seeds: {SEEDS}")
print(f"  Prompts: MD5 {'VERIFIED' if PROMPTS_VERIFIED else 'FAILED'}")
print(f"  Quantization: {QUANTIZATION}")
print(f"\nSPLIT-BRAIN VERDICT: {split_brain_verdict}")

try:
    from google.colab import files
    files.download(filename)
    print("\nFile downloaded!")
except:
    print("\n(Not in Colab)")

---

## Summary: E11-Indra-Gemma27B-V4 (Dual Mode)

### Purpose

Validate if Split-Brain pattern is **real** or **artifact**:

| Mode | GPU Requirement | What it tests |
|------|-----------------|---------------|
| `8bit` | 24GB+ | Reference run (matches V3) |
| `fp` | 80GB (A100) | Full precision validation |

### Expected Outcomes

| Verdict | If FP shows... | Meaning |
|---------|----------------|--------|
| `SPLIT_BRAIN_CONFIRMED` | Early=high, Middle/Late=low | Pattern is architecture-determined |
| `SPLIT_BRAIN_ARTIFACT` | All regions=high SI | 8-bit created false pattern |

### Usage

```python
# Cell 1:
PRECISION_MODE = "fp"    # For A100-80GB
PRECISION_MODE = "8bit"  # For smaller GPUs
```

---

*Paper 4: Behavioral Sink Dynamics*  
*E11-Indra-Gemma27B-V4: Split-Brain Validation*  
*Gene Kranz Protocol: Mission Control*