# Grand Unified Thermodynamic Benchmark

**Paper #3 Validation: P1 + P2 + P3 Combined**

**Date:** 2026-01-05

**Purpose:** Stabilize claims for Paper #3 (Topological Thermodynamics)

---

## What This Tests

| Test | Description | Hypothesis |
|------|-------------|------------|
| **P1** | Cross-Model Input-Dependency | Different architectures have different BASE LEVELs |
| **P2** | Expanded Prompt Set (n=25) | Statistical significance (p < 0.05) |
| **P3** | Cliché vs Novel | "Plattitüden-Tunnel" hypothesis |

## Expected Results

| Model | Base Level | Prediction |
|-------|------------|------------|
| LLaMA 3.1 | < 1.0 (Dämpfung) | Always braking, modulates 0.48 → 0.80 |
| Mistral | ≈ 1.0 (Inertia) | Neutral, modulates around 1.0 |
| Gemma | > 1.0 (Instabil) | Tends to explode |
| Pythia | < 1.0 (LayerNorm) | Control group |

In [None]:
# Cell 1: Setup
!pip install -q transformers accelerate scipy seaborn

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoModelForCausalLM, AutoTokenizer
from scipy.stats import entropy, spearmanr, pearsonr, ttest_ind
import gc
import json
from datetime import datetime
from google.colab import userdata, files
import os

# Get HF Token
HF_TOKEN = userdata.get('HF_TOKEN')

# Configure visualization
plt.style.use('seaborn-v0_8-paper')
sns.set_context("talk")

# Global timestamp for all files
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"Session timestamp: {TIMESTAMP}")

print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Cell 2: THE EXPANDED PROMPT SET (P2 + P3 Integration)
# 25 prompts across 5 categories

PROMPT_DATASET = {
    "Factual": [
        "The capital city of France is",
        "The atomic number of oxygen is",
        "Water boils at a temperature of",
        "The largest planet in our solar system is",
        "The currency used in Japan is"
    ],
    "Syntactic": [
        "The agreement, which, notwithstanding the fact that it was signed only yesterday, effectively binds all parties immediately, stipulates that",
        "Although the weather was extremely cold, and despite the fact that they had no coats, the children decided to",
        "The professor, having reviewed the complex derivation multiple times without finding the error, finally realized that",
        "To imply that such a fundamental shift in policy could occur without significant public debate is to suggest that",
        "Not only did the experiment fail to yield the expected results, but it also demonstrated that the initial hypothesis was"
    ],
    "Cliche": [
        "The true meaning of happiness is often found in",
        "Actions speak louder than",
        "It is what it is, and we must simply",
        "Time heals all",
        "Life is a journey, not a"
    ],
    "Novel": [
        "The epistemological implications of quantum decoherence suggest that the observer is",
        "If consciousness creates reality, then the paradox of the unobserved electron implies",
        "The intersection of baroque architecture and cybernetic theory creates a space where",
        "Calculating the trajectory of a hyperspace jump requires factoring in the variability of",
        "The symbiotic relationship between fungal mycelium and digital neural networks results in"
    ],
    "Nonsense": [
        "Table sky run blue jump quickly under over",
        "Purple idea furiously sleep colorless green",
        "Clock river dance potato seven fast",
        "Window eat loud tomorrow yellow under",
        "Fish bicycle logic cloud mountain swim"
    ]
}

# Category metadata for analysis
CATEGORY_METADATA = {
    "Factual": {"expected_gain": "low", "expected_entropy": "medium", "complexity": 1},
    "Syntactic": {"expected_gain": "high", "expected_entropy": "medium", "complexity": 5},
    "Cliche": {"expected_gain": "medium", "expected_entropy": "low", "complexity": 2},
    "Novel": {"expected_gain": "high", "expected_entropy": "high", "complexity": 4},
    "Nonsense": {"expected_gain": "medium", "expected_entropy": "very_high", "complexity": 3}
}

# Count prompts
total_prompts = sum(len(v) for v in PROMPT_DATASET.values())
print(f"Total prompts: {total_prompts} (5 categories x 5 prompts)")
for cat, prompts in PROMPT_DATASET.items():
    print(f"  {cat}: {len(prompts)} prompts")

In [None]:
# Cell 3: MODEL ZOO (P1 - Cross-Architecture)

MODELS_TO_TEST = {
    "Pythia-6.9B": {
        "hf_path": "EleutherAI/pythia-6.9b",
        "norm_type": "LayerNorm",
        "expected_base": "< 1.0",
        "role": "Control (LayerNorm)"
    },
    "Gemma-7B": {
        "hf_path": "google/gemma-7b",
        "norm_type": "RMSNorm",
        "expected_base": "> 1.0",
        "role": "Exploder"
    },
    "Mistral-7B": {
        "hf_path": "mistralai/Mistral-7B-v0.1",
        "norm_type": "RMSNorm",
        "expected_base": "~ 1.0",
        "role": "Inertia"
    },
    "LLaMA-3.1-8B": {
        "hf_path": "meta-llama/Llama-3.1-8B",
        "norm_type": "RMSNorm",
        "expected_base": "< 1.0",
        "role": "Dampener"
    }
}

print("Models to test:")
for name, info in MODELS_TO_TEST.items():
    print(f"  {name}: {info['norm_type']} | Expected: {info['expected_base']} | Role: {info['role']}")

In [None]:
# Cell 4: MEASUREMENT ENGINE

def get_layer_list(model):
    """Get the transformer layers from different model architectures."""
    if hasattr(model, 'model') and hasattr(model.model, 'layers'):
        return model.model.layers  # LLaMA, Mistral, Gemma
    elif hasattr(model, 'gpt_neox') and hasattr(model.gpt_neox, 'layers'):
        return model.gpt_neox.layers  # Pythia
    elif hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
        return model.transformer.h  # GPT-2 style
    else:
        raise ValueError(f"Unknown model architecture: {type(model)}")

def measure_thermodynamics(model, tokenizer, text, device='cuda'):
    """Measure residual stream gain and output entropy for a given input."""
    
    # Tokenize
    inputs = tokenizer(text, return_tensors="pt").to(device)
    
    # Hooks to capture residual stream norms
    norms = []
    
    def get_norm_hook():
        def hook(module, input, output):
            if isinstance(output, tuple):
                hidden_state = output[0]
            else:
                hidden_state = output
            # Norm of last token
            last_token_norm = torch.norm(hidden_state[0, -1]).item()
            norms.append(last_token_norm)
        return hook
    
    # Register hooks on all layers
    handles = []
    layers = get_layer_list(model)
    for layer in layers:
        handles.append(layer.register_forward_hook(get_norm_hook()))
    
    # Forward pass
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    
    # Cleanup hooks
    for h in handles:
        h.remove()
    
    # Calculate metrics
    if len(norms) >= 2:
        # Last Layer Gain = Output of Last Layer / Output of Penultimate Layer
        last_gain = norms[-1] / norms[-2] if norms[-2] > 0 else 1.0
        # Total Amplification = Final / Initial
        total_amp = norms[-1] / norms[0] if norms[0] > 0 else 1.0
    else:
        last_gain = 1.0
        total_amp = 1.0
    
    # Output Entropy (next token distribution)
    last_token_logits = logits[0, -1, :]
    probs = torch.softmax(last_token_logits, dim=0).cpu().numpy()
    ent = entropy(probs)
    
    # Top token and probability
    top_idx = torch.argmax(last_token_logits).item()
    top_prob = probs[top_idx]
    top_token = tokenizer.decode([top_idx])
    
    return {
        "last_gain": last_gain,
        "total_amp": total_amp,
        "entropy": ent,
        "top_token": top_token,
        "top_prob": top_prob,
        "n_layers": len(norms),
        "all_norms": norms
    }

print("Measurement engine ready.")

In [None]:
# Cell 5: EXECUTION LOOP

all_results = []

print("=" * 70)
print("STARTING GRAND UNIFIED THERMODYNAMIC BENCHMARK")
print(f"Models: {len(MODELS_TO_TEST)} | Prompts: {total_prompts}")
print(f"Total measurements: {len(MODELS_TO_TEST) * total_prompts}")
print("=" * 70 + "\n")

for model_name, model_info in MODELS_TO_TEST.items():
    hf_path = model_info["hf_path"]
    print(f"\n{'='*60}")
    print(f"Loading {model_name} ({model_info['role']})...")
    print(f"  Path: {hf_path}")
    print(f"  Norm: {model_info['norm_type']}")
    print(f"  Expected Base: {model_info['expected_base']}")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(hf_path, token=HF_TOKEN)
        model = AutoModelForCausalLM.from_pretrained(
            hf_path,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True,
            token=HF_TOKEN
        )
        
        print(f"\n  Testing {len(PROMPT_DATASET)} categories ({total_prompts} prompts)...")
        
        for category, prompts in PROMPT_DATASET.items():
            print(f"    {category}: ", end="")
            for i, prompt in enumerate(prompts):
                res = measure_thermodynamics(model, tokenizer, prompt)
                
                all_results.append({
                    "Model": model_name,
                    "Norm_Type": model_info["norm_type"],
                    "Role": model_info["role"],
                    "Category": category,
                    "Complexity": CATEGORY_METADATA[category]["complexity"],
                    "Prompt": prompt,
                    "Prompt_Short": prompt[:40] + "...",
                    "Entropy": res["entropy"],
                    "Last_Gain": res["last_gain"],
                    "Total_Amp": res["total_amp"],
                    "Top_Token": res["top_token"],
                    "Top_Prob": res["top_prob"],
                    "N_Layers": res["n_layers"]
                })
                print(".", end="", flush=True)
            print(f" Done (n={len(prompts)})")
        
        # Cleanup
        del model
        del tokenizer
        torch.cuda.empty_cache()
        gc.collect()
        print(f"\n  {model_name} complete. Memory cleared.")
        
    except Exception as e:
        print(f"\n  FAILED: {e}")
        import traceback
        traceback.print_exc()

print("\n" + "=" * 70)
print("BENCHMARK COMPLETE")
print(f"Total measurements: {len(all_results)}")
print("=" * 70)

In [None]:
# Cell 6: CREATE DATAFRAME & SAVE

df = pd.DataFrame(all_results)

# Define filenames with global timestamp
CSV_FILE = f"thermodynamic_benchmark_{TIMESTAMP}.csv"
JSON_FILE = f"thermodynamic_benchmark_{TIMESTAMP}.json"
PNG_MAIN = f"grand_unified_benchmark_{TIMESTAMP}.png"
PNG_DETAIL = f"per_model_detail_{TIMESTAMP}.png"

# Save raw results
df.to_csv(CSV_FILE, index=False)
print(f"Saved: {CSV_FILE}")

# Display summary
print("\nRESULTS SUMMARY")
print("=" * 60)
print(df.groupby(['Model', 'Category'])[['Last_Gain', 'Entropy']].mean().round(3))

In [None]:
# Cell 7: STATISTICAL ANALYSIS

print("\n" + "=" * 70)
print("STATISTICAL ANALYSIS")
print("=" * 70)

# 1. Per-Model Correlation: Entropy vs Gain
print("\n1. CORRELATION: Entropy vs Last_Gain (per model)")
print("-" * 60)

correlation_results = []
for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    
    # Spearman (rank correlation)
    spearman_r, spearman_p = spearmanr(subset['Entropy'], subset['Last_Gain'])
    
    # Pearson (linear correlation)
    pearson_r, pearson_p = pearsonr(subset['Entropy'], subset['Last_Gain'])
    
    sig_s = "***" if spearman_p < 0.001 else "**" if spearman_p < 0.01 else "*" if spearman_p < 0.05 else "ns"
    sig_p = "***" if pearson_p < 0.001 else "**" if pearson_p < 0.01 else "*" if pearson_p < 0.05 else "ns"
    
    correlation_results.append({
        "Model": model,
        "Spearman_r": spearman_r,
        "Spearman_p": spearman_p,
        "Pearson_r": pearson_r,
        "Pearson_p": pearson_p
    })
    
    print(f"{model:<20} | Spearman: r={spearman_r:+.3f} p={spearman_p:.4f} ({sig_s})")
    print(f"{'':<20} | Pearson:  r={pearson_r:+.3f} p={pearson_p:.4f} ({sig_p})")

# 2. Per-Model Correlation: Complexity vs Gain  
print("\n2. CORRELATION: Complexity vs Last_Gain (per model)")
print("-" * 60)

complexity_correlations = []
for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    spearman_r, spearman_p = spearmanr(subset['Complexity'], subset['Last_Gain'])
    sig = "***" if spearman_p < 0.001 else "**" if spearman_p < 0.01 else "*" if spearman_p < 0.05 else "ns"
    print(f"{model:<20} | r={spearman_r:+.3f} p={spearman_p:.4f} ({sig})")
    complexity_correlations.append({
        "Model": model,
        "Complexity_Spearman_r": spearman_r,
        "Complexity_Spearman_p": spearman_p
    })

# 3. Base Level Analysis (mean gain per model)
print("\n3. BASE LEVEL ANALYSIS (Architektur-Bias)")
print("-" * 60)

base_levels = df.groupby('Model')['Last_Gain'].agg(['mean', 'std', 'min', 'max'])
print(base_levels.round(3))

# 4. Modulation Range (max - min per model)
print("\n4. MODULATION RANGE (Input-Dependency)")
print("-" * 60)

for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    min_gain = subset['Last_Gain'].min()
    max_gain = subset['Last_Gain'].max()
    modulation = max_gain - min_gain
    print(f"{model:<20} | Min: {min_gain:.3f} | Max: {max_gain:.3f} | Range: {modulation:.3f}")

In [None]:
# Cell 8: P3 VALIDATION - Cliche vs Novel

print("\n" + "=" * 70)
print("P3 VALIDATION: PLATITUEDEN-TUNNEL HYPOTHESIS")
print("=" * 70)

print("\nHypothesis: Clich\u00e9s have LOWER entropy than Novel prompts")
print("(Because LLMs have strong priors for common phrases)\n")

p3_results = []
for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    
    cliche_entropy = subset[subset['Category'] == 'Cliche']['Entropy']
    novel_entropy = subset[subset['Category'] == 'Novel']['Entropy']
    
    # T-test
    t_stat, p_val = ttest_ind(cliche_entropy, novel_entropy)
    
    # Effect size (Cohen's d)
    pooled_std = np.sqrt((cliche_entropy.std()**2 + novel_entropy.std()**2) / 2)
    cohens_d = (cliche_entropy.mean() - novel_entropy.mean()) / pooled_std if pooled_std > 0 else 0
    
    sig = "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else "ns"
    direction = "CONFIRMED" if cliche_entropy.mean() < novel_entropy.mean() else "REJECTED"
    
    p3_results.append({
        "Model": model,
        "Cliche_Entropy_Mean": cliche_entropy.mean(),
        "Novel_Entropy_Mean": novel_entropy.mean(),
        "t_stat": t_stat,
        "p_val": p_val,
        "cohens_d": cohens_d,
        "hypothesis": direction
    })
    
    print(f"{model}:")
    print(f"  Clich\u00e9 entropy:  {cliche_entropy.mean():.3f} \u00b1 {cliche_entropy.std():.3f}")
    print(f"  Novel entropy:   {novel_entropy.mean():.3f} \u00b1 {novel_entropy.std():.3f}")
    print(f"  t-test: t={t_stat:.3f}, p={p_val:.4f} ({sig})")
    print(f"  Cohen's d: {cohens_d:.3f}")
    print(f"  Hypothesis: {direction}\n")

In [None]:
# Cell 9: VISUALIZATION - Main Results

fig, axes = plt.subplots(2, 2, figsize=(16, 14))

# A. Scatter: Entropy vs Gain (all models)
ax1 = axes[0, 0]
for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    ax1.scatter(subset['Entropy'], subset['Last_Gain'], label=model, alpha=0.7, s=80)
ax1.axhline(1.0, ls='--', c='gray', alpha=0.5, label='Neutral (1.0)')
ax1.set_xlabel('Output Entropy (nats)')
ax1.set_ylabel('Last Layer Gain')
ax1.set_title('A. Entropy vs Gain (All Models)')
ax1.legend(loc='best')
ax1.grid(True, alpha=0.3)

# B. Box: Gain by Category (faceted by model)
ax2 = axes[0, 1]
category_order = ['Factual', 'Cliche', 'Nonsense', 'Novel', 'Syntactic']
sns.boxplot(data=df, x='Category', y='Last_Gain', hue='Model', ax=ax2, order=category_order)
ax2.axhline(1.0, ls='--', c='gray', alpha=0.5)
ax2.set_xlabel('Prompt Category')
ax2.set_ylabel('Last Layer Gain')
ax2.set_title('B. Gain by Category (Bremspedal-Gesetz)')
ax2.legend(loc='upper right', fontsize=8)
ax2.tick_params(axis='x', rotation=45)

# C. Box: Entropy by Category  
ax3 = axes[1, 0]
sns.boxplot(data=df, x='Category', y='Entropy', hue='Model', ax=ax3, order=category_order)
ax3.set_xlabel('Prompt Category')
ax3.set_ylabel('Output Entropy (nats)')
ax3.set_title('C. Entropy by Category (Plattit\u00fcden-Tunnel)')
ax3.legend(loc='upper right', fontsize=8)
ax3.tick_params(axis='x', rotation=45)

# D. Base Level Bar Chart
ax4 = axes[1, 1]
model_means = df.groupby('Model')['Last_Gain'].mean().sort_values()
model_stds = df.groupby('Model')['Last_Gain'].std()
colors = ['#d62728' if m < 1.0 else '#2ca02c' if m > 1.0 else '#1f77b4' for m in model_means]
bars = ax4.bar(model_means.index, model_means.values, yerr=model_stds[model_means.index].values, 
               color=colors, alpha=0.7, capsize=5)
ax4.axhline(1.0, ls='--', c='gray', alpha=0.5, label='Neutral')
ax4.set_xlabel('Model')
ax4.set_ylabel('Mean Last Layer Gain')
ax4.set_title('D. Architecture Base Level')
ax4.tick_params(axis='x', rotation=45)

# Add value labels on bars
for bar, val in zip(bars, model_means.values):
    ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, 
             f'{val:.2f}', ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.savefig(PNG_MAIN, dpi=150, bbox_inches='tight')
plt.show()

print(f"\nFigure saved: {PNG_MAIN}")

In [None]:
# Cell 10: VISUALIZATION - Per-Model Detail

fig, axes = plt.subplots(2, 2, figsize=(16, 14))

models_list = list(df['Model'].unique())
for idx, model in enumerate(models_list):
    if idx >= 4:
        break
    ax = axes[idx // 2, idx % 2]
    subset = df[df['Model'] == model]
    
    # Scatter with category colors
    for cat in category_order:
        cat_data = subset[subset['Category'] == cat]
        ax.scatter(cat_data['Entropy'], cat_data['Last_Gain'], label=cat, s=100, alpha=0.8)
    
    ax.axhline(1.0, ls='--', c='gray', alpha=0.5)
    ax.set_xlabel('Output Entropy')
    ax.set_ylabel('Last Layer Gain')
    
    # Add correlation
    r, p = spearmanr(subset['Entropy'], subset['Last_Gain'])
    sig = "***" if p < 0.001 else "**" if p < 0.01 else "*" if p < 0.05 else ""
    ax.set_title(f'{model}\n(Spearman r={r:.3f}{sig}, p={p:.4f})')
    ax.legend(loc='best', fontsize=8)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(PNG_DETAIL, dpi=150, bbox_inches='tight')
plt.show()

print(f"\nFigure saved: {PNG_DETAIL}")

In [None]:
# Cell 11: SAVE RESULTS AS JSON

results_json = {
    "experiment": "Grand Unified Thermodynamic Benchmark",
    "date": TIMESTAMP,
    "n_models": len(MODELS_TO_TEST),
    "n_prompts": total_prompts,
    "n_measurements": len(df),
    "models": list(MODELS_TO_TEST.keys()),
    "categories": list(PROMPT_DATASET.keys()),
    "base_levels": {k: float(v) for k, v in df.groupby('Model')['Last_Gain'].mean().to_dict().items()},
    "modulation_ranges": {
        model: {
            "min": float(df[df['Model'] == model]['Last_Gain'].min()),
            "max": float(df[df['Model'] == model]['Last_Gain'].max()),
            "range": float(df[df['Model'] == model]['Last_Gain'].max() - df[df['Model'] == model]['Last_Gain'].min())
        }
        for model in df['Model'].unique()
    },
    "entropy_correlations": correlation_results,
    "complexity_correlations": complexity_correlations,
    "p3_platitude_tunnel": p3_results,
    "all_results": all_results
}

with open(JSON_FILE, 'w') as f:
    json.dump(results_json, f, indent=2, default=str)

print(f"Results saved to {JSON_FILE}")

In [None]:
# Cell 12: FINAL VERDICT

print("\n" + "=" * 70)
print("FINAL VERDICT: PAPER #3 CLAIM VALIDATION")
print("=" * 70)

# 1. Base Level Hypothesis
print("\n1. BASE LEVEL HYPOTHESIS (Architektur-Bias)")
print("-" * 60)
base_means = df.groupby('Model')['Last_Gain'].mean()
for model in ['LLaMA-3.1-8B', 'Mistral-7B', 'Gemma-7B', 'Pythia-6.9B']:
    if model in base_means.index:
        mean = base_means[model]
        expected = MODELS_TO_TEST[model]['expected_base']
        if '< 1' in expected and mean < 1.0:
            status = 'CONFIRMED'
        elif '> 1' in expected and mean > 1.0:
            status = 'CONFIRMED'
        elif '~ 1' in expected and 0.9 < mean < 1.1:
            status = 'CONFIRMED'
        else:
            status = 'UNEXPECTED'
        print(f"  {model}: Mean={mean:.3f} | Expected: {expected} | {status}")

# 2. Input-Dependency Hypothesis
print("\n2. INPUT-DEPENDENCY HYPOTHESIS (Bremspedal-Gesetz)")
print("-" * 60)
for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    r, p = spearmanr(subset['Complexity'], subset['Last_Gain'])
    status = 'CONFIRMED' if p < 0.05 and r > 0 else 'NOT SIGNIFICANT' if p >= 0.05 else 'REJECTED'
    print(f"  {model}: r={r:+.3f}, p={p:.4f} | {status}")

# 3. Plattitueden-Tunnel Hypothesis
print("\n3. PLATTITUEDEN-TUNNEL HYPOTHESIS")
print("-" * 60)
for model in df['Model'].unique():
    subset = df[df['Model'] == model]
    cliche_ent = subset[subset['Category'] == 'Cliche']['Entropy'].mean()
    novel_ent = subset[subset['Category'] == 'Novel']['Entropy'].mean()
    status = 'CONFIRMED' if cliche_ent < novel_ent else 'REJECTED'
    print(f"  {model}: Cliche={cliche_ent:.3f} vs Novel={novel_ent:.3f} | {status}")

print("\n" + "=" * 70)
print("BENCHMARK COMPLETE")
print("=" * 70)

In [None]:
# Cell 13: DOWNLOAD ALL RESULTS

print("\n" + "=" * 70)
print("DOWNLOADING RESULTS")
print("=" * 70)

# List all files to download
files_to_download = [CSV_FILE, JSON_FILE, PNG_MAIN, PNG_DETAIL]

print("\nFiles to download:")
for f in files_to_download:
    if os.path.exists(f):
        size = os.path.getsize(f) / 1024  # KB
        print(f"  {f} ({size:.1f} KB)")
    else:
        print(f"  {f} (NOT FOUND)")

print("\nStarting downloads...")

# Download each file
for f in files_to_download:
    if os.path.exists(f):
        try:
            files.download(f)
            print(f"  Downloaded: {f}")
        except Exception as e:
            print(f"  FAILED to download {f}: {e}")
    else:
        print(f"  SKIPPED (not found): {f}")

print("\n" + "=" * 70)
print("ALL DOWNLOADS COMPLETE")
print("=" * 70)