# Architecture Comparison Figure

This notebook generates a bar chart comparing encoder architectures:
- Train EM% (training accuracy)
- Test Pass@1% (generalization)

Output: `docs/project-report/figures/architecture_comparison.png`

In [None]:
import sys
from pathlib import Path

sys.path.insert(0, str(Path.cwd()))

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

from figure_utils import (
    setup_paper_style,
    save_figure,
    fetch_final_runs,
    COLORS,
)

# Apply paper styling
setup_paper_style()

## 1. Fetch Data

In [None]:
# Fetch final experiments
df = fetch_final_runs()

# Preview
df[['display_name', 'encoder_type', 'num_params', 'train_exact_acc', 'arc_pass1', 'state']]

In [None]:
# Filter to completed or running experiments (include F4 even if running)
df_plot = df[df['state'].isin(['finished', 'running'])].copy()

# Sort by experiment name (F1, F2, F3, F4)
df_plot = df_plot.sort_values('display_name').reset_index(drop=True)

print(f"Plotting {len(df_plot)} experiments")
df_plot[['display_name', 'train_exact_acc', 'arc_pass1']]

## 2. Create Architecture Comparison Figure

In [None]:
# Prepare data
experiments = df_plot['display_name'].tolist()
train_em = df_plot['train_exact_acc'].values
test_pass1 = df_plot['arc_pass1'].values

# Nice labels
labels = {
    'F1_standard': 'F1: Standard\n(2L, 17M)',
    'F2_hybrid_var': 'F2: Hybrid VAE\n(4L, 29M)',
    'F3_etrmtrm': 'F3: ETRMTRM\n(recurrent, 21M)',
    'F4_lpn_var': 'F4: LPN VAE\n(2L, 8M)',
}

x_labels = [labels.get(exp, exp) for exp in experiments]

In [None]:
# Create figure
fig, ax = plt.subplots(figsize=(6, 4))

x = np.arange(len(experiments))
width = 0.35

# Create bars
bars1 = ax.bar(x - width/2, train_em, width, label='Train EM%', color=COLORS['train'], alpha=0.9)
bars2 = ax.bar(x + width/2, test_pass1, width, label='Test Pass@1%', color=COLORS['test'], alpha=0.9)

# Customize
ax.set_ylabel('Accuracy (%)')
ax.set_xticks(x)
ax.set_xticklabels(x_labels, fontsize=8)
ax.legend(loc='upper right', frameon=False)
ax.set_ylim(0, 100)
ax.grid(True, alpha=0.3, linestyle='--', axis='y')

# Add value labels on bars
for bar in bars1:
    height = bar.get_height()
    if height > 0:
        ax.annotate(f'{height:.1f}',
                    xy=(bar.get_x() + bar.get_width()/2, height),
                    xytext=(0, 3),
                    textcoords='offset points',
                    ha='center', va='bottom', fontsize=8)

for bar in bars2:
    height = bar.get_height()
    if height > 0:
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width()/2, height),
                    xytext=(0, 3),
                    textcoords='offset points',
                    ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

## 3. Alternative: Horizontal Bar Chart

In [None]:
# Horizontal version (better for paper if many experiments)
fig, ax = plt.subplots(figsize=(7, 4))

y = np.arange(len(experiments))
height = 0.35

# Create bars
bars1 = ax.barh(y - height/2, train_em, height, label='Train EM%', color=COLORS['train'], alpha=0.9)
bars2 = ax.barh(y + height/2, test_pass1, height, label='Test Pass@1%', color=COLORS['test'], alpha=0.9)

# Customize
ax.set_xlabel('Accuracy (%)')
ax.set_yticks(y)
ax.set_yticklabels(x_labels, fontsize=9)
ax.legend(loc='lower right', frameon=False)
ax.set_xlim(0, 100)
ax.grid(True, alpha=0.3, linestyle='--', axis='x')
ax.invert_yaxis()  # Top to bottom

# Add value labels
for bar in bars1:
    width_val = bar.get_width()
    if width_val > 0:
        ax.annotate(f'{width_val:.1f}',
                    xy=(width_val, bar.get_y() + bar.get_height()/2),
                    xytext=(3, 0),
                    textcoords='offset points',
                    ha='left', va='center', fontsize=8)

for bar in bars2:
    width_val = bar.get_width()
    if width_val > 0:
        ax.annotate(f'{width_val:.2f}',
                    xy=(width_val, bar.get_y() + bar.get_height()/2),
                    xytext=(3, 0),
                    textcoords='offset points',
                    ha='left', va='center', fontsize=8)

plt.tight_layout()
plt.show()

## 4. Alternative: Separate Panels for Train vs Test

In [None]:
# Two-panel version showing train/test gap more clearly
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

# Assign colors per experiment
exp_colors = [
    COLORS['standard'],
    COLORS['hybrid_variational'],
    COLORS['etrmtrm'],
    COLORS['lpn_var'],
]

# Panel 1: Train EM%
ax1 = axes[0]
bars = ax1.bar(x, train_em, color=exp_colors[:len(x)], alpha=0.9)
ax1.set_ylabel('Train Exact Match (%)')
ax1.set_xticks(x)
ax1.set_xticklabels([f'F{i+1}' for i in range(len(x))], fontsize=10)
ax1.set_ylim(0, 100)
ax1.grid(True, alpha=0.3, linestyle='--', axis='y')
ax1.set_title('(a) Training Accuracy', fontsize=10)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax1.annotate(f'{height:.1f}%',
                xy=(bar.get_x() + bar.get_width()/2, height),
                xytext=(0, 3),
                textcoords='offset points',
                ha='center', va='bottom', fontsize=9)

# Panel 2: Test Pass@1%
ax2 = axes[1]
bars = ax2.bar(x, test_pass1, color=exp_colors[:len(x)], alpha=0.9)
ax2.set_ylabel('Test Pass@1 (%)')
ax2.set_xticks(x)
ax2.set_xticklabels([f'F{i+1}' for i in range(len(x))], fontsize=10)
ax2.set_ylim(0, max(test_pass1) * 1.5 if max(test_pass1) > 0 else 1)  # Scale to show small values
ax2.grid(True, alpha=0.3, linestyle='--', axis='y')
ax2.set_title('(b) Test Generalization', fontsize=10)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax2.annotate(f'{height:.2f}%',
                xy=(bar.get_x() + bar.get_width()/2, height),
                xytext=(0, 3),
                textcoords='offset points',
                ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

## 5. Save Figures

In [None]:
# Save vertical grouped bar chart (main version)
fig, ax = plt.subplots(figsize=(6, 4))

x = np.arange(len(experiments))
width = 0.35

bars1 = ax.bar(x - width/2, train_em, width, label='Train EM%', color=COLORS['train'], alpha=0.9)
bars2 = ax.bar(x + width/2, test_pass1, width, label='Test Pass@1%', color=COLORS['test'], alpha=0.9)

ax.set_ylabel('Accuracy (%)')
ax.set_xticks(x)
ax.set_xticklabels(x_labels, fontsize=8)
ax.legend(loc='upper right', frameon=False)
ax.set_ylim(0, 100)
ax.grid(True, alpha=0.3, linestyle='--', axis='y')

for bar in bars1:
    height = bar.get_height()
    if height > 0:
        ax.annotate(f'{height:.1f}',
                    xy=(bar.get_x() + bar.get_width()/2, height),
                    xytext=(0, 3),
                    textcoords='offset points',
                    ha='center', va='bottom', fontsize=8)

for bar in bars2:
    height = bar.get_height()
    if height > 0:
        ax.annotate(f'{height:.2f}',
                    xy=(bar.get_x() + bar.get_width()/2, height),
                    xytext=(0, 3),
                    textcoords='offset points',
                    ha='center', va='bottom', fontsize=8)

plt.tight_layout()
save_figure(fig, 'architecture_comparison')
plt.show()

In [None]:
# Also save two-panel version
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

exp_colors = [
    COLORS['standard'],
    COLORS['hybrid_variational'],
    COLORS['etrmtrm'],
    COLORS['lpn_var'],
]

# Panel 1: Train EM%
ax1 = axes[0]
bars = ax1.bar(x, train_em, color=exp_colors[:len(x)], alpha=0.9)
ax1.set_ylabel('Train Exact Match (%)')
ax1.set_xticks(x)
ax1.set_xticklabels([f'F{i+1}' for i in range(len(x))], fontsize=10)
ax1.set_ylim(0, 100)
ax1.grid(True, alpha=0.3, linestyle='--', axis='y')
ax1.set_title('(a) Training Accuracy', fontsize=10)

for bar in bars:
    height = bar.get_height()
    ax1.annotate(f'{height:.1f}%',
                xy=(bar.get_x() + bar.get_width()/2, height),
                xytext=(0, 3),
                textcoords='offset points',
                ha='center', va='bottom', fontsize=9)

# Panel 2: Test Pass@1%
ax2 = axes[1]
bars = ax2.bar(x, test_pass1, color=exp_colors[:len(x)], alpha=0.9)
ax2.set_ylabel('Test Pass@1 (%)')
ax2.set_xticks(x)
ax2.set_xticklabels([f'F{i+1}' for i in range(len(x))], fontsize=10)
ax2.set_ylim(0, max(test_pass1) * 1.5 if max(test_pass1) > 0 else 1)
ax2.grid(True, alpha=0.3, linestyle='--', axis='y')
ax2.set_title('(b) Test Generalization', fontsize=10)

for bar in bars:
    height = bar.get_height()
    ax2.annotate(f'{height:.2f}%',
                xy=(bar.get_x() + bar.get_width()/2, height),
                xytext=(0, 3),
                textcoords='offset points',
                ha='center', va='bottom', fontsize=9)

plt.tight_layout()
save_figure(fig, 'architecture_comparison_panels')
plt.show()

In [None]:
print("\nDone! Figures saved to docs/project-report/figures/")