# Mixture-of-Gaussians Evaluation

This notebook evaluates MoG simulation results for VAE unlearning.

**Experiments:**
- 5 main scenarios (component removal, overlapping, scattered, partial)
- 3 dimensionality scaling experiments (d∈{2,10,20})
- Retrain-floor logic applied throughout

In [None]:
# Load and visualize MoG results
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print('Loading MoG summary...')
with open('outputs/p3/mog/summary.json', 'r') as f:
    summary = json.load(f)

print(f"Description: {summary['description']}")
print(f"Total experiments: {len(summary['experiments']) + len(summary['scaling_experiments'])}")

## Main Experiments Overview

In [None]:
# Extract results into DataFrame
results = []
for exp in summary['experiments']:
    results.append({
        'Experiment': exp['experiment_name'],
        'K': exp['data']['K'],
        'd': exp['data']['d'],
        'n': exp['data']['n'],
        'Scenario': exp['data']['scenario'],
        'Forget': exp['forget_scenario'],
        'F_size': exp['forget_set_size'],
        'Baseline_ARI': exp['baseline']['ari'],
        'Baseline_AUC': exp['baseline']['auc_avg'],
        'Retrain_ARI': exp['retrain']['ari'],
        'Retrain_Floor': exp['retrain']['auc_floor'],
        'ELBO_Gap_%': exp['retrain']['elbo_gap_percent']
    })

df_main = pd.DataFrame(results)
print('\nMain Experiments:')
print(df_main.to_string(index=False))

## Figure 1: Privacy Leakage Analysis

Baseline vs Retrain-Floor AUC across different scenarios

In [None]:
# Baseline vs Retrain Floor comparison
fig, ax = plt.subplots(figsize=(12, 6))

x = np.arange(len(df_main))
width = 0.35

bars1 = ax.bar(x - width/2, df_main['Baseline_AUC'], width, label='Baseline', alpha=0.8, color='#e74c3c')
bars2 = ax.bar(x + width/2, df_main['Retrain_Floor'], width, label='Retrain Floor', alpha=0.8, color='#3498db')

ax.axhline(y=0.5, color='gray', linestyle='--', linewidth=1, label='Random (AUC=0.5)', alpha=0.5)
ax.set_xlabel('Experiment', fontsize=12)
ax.set_ylabel('AUC (F vs Unseen & Retain avg)', fontsize=12)
ax.set_title('Privacy Leakage - Baseline vs Retrain Floor', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels([f"{row['Scenario'][:3]}\n{row['Forget'][:8]}\nd={row['d']}" 
                     for _, row in df_main.iterrows()], fontsize=9)
ax.legend(loc='upper right', fontsize=10)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('outputs/p3/mog/figure_privacy_leakage.png', dpi=150, bbox_inches='tight')
plt.show()

print('\nKey Observations:')
print(f"- Highest leakage: {df_main.loc[df_main['Baseline_AUC'].idxmax(), 'Experiment']} (AUC={df_main['Baseline_AUC'].max():.3f})")
print(f"- Lowest leakage: {df_main.loc[df_main['Baseline_AUC'].idxmin(), 'Experiment']} (AUC={df_main['Baseline_AUC'].min():.3f})")
print(f"- Mean retrain floor: {df_main['Retrain_Floor'].mean():.3f}")

## Figure 2: Utility vs Privacy Trade-off

In [None]:
# Scatter plot of utility (ARI) vs privacy (retrain floor)
fig, ax = plt.subplots(figsize=(10, 8))

colors = ['#e74c3c' if 'overlapping' in exp else '#2ecc71' if 'scattered' in exp else '#3498db' 
          for exp in df_main['Experiment']]

scatter = ax.scatter(df_main['Baseline_ARI'], df_main['Retrain_Floor'], 
                     c=colors, s=200, alpha=0.7, edgecolors='black', linewidth=1.5)

# Annotations
for idx, row in df_main.iterrows():
    ax.annotate(f"d={row['d']}", 
                (row['Baseline_ARI'], row['Retrain_Floor']),
                xytext=(5, 5), textcoords='offset points', fontsize=9)

ax.set_xlabel('Baseline ARI (Clustering Quality)', fontsize=12)
ax.set_ylabel('Retrain Floor AUC (Privacy Risk)', fontsize=12)
ax.set_title('Utility vs Privacy Trade-off', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

# Legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='#3498db', label='Separated'),
    Patch(facecolor='#e74c3c', label='Overlapping'),
    Patch(facecolor='#2ecc71', label='Scattered')
]
ax.legend(handles=legend_elements, loc='lower right', fontsize=10)

plt.tight_layout()
plt.savefig('outputs/p3/mog/figure_utility_privacy_tradeoff.png', dpi=150, bbox_inches='tight')
plt.show()

print('\nCorrelation between ARI and Retrain Floor:')
print(f"Pearson r = {df_main['Baseline_ARI'].corr(df_main['Retrain_Floor']):.3f}")
print('\nInterpretation: Higher clustering quality (ARI) often correlates with higher privacy risk.')

## Memorization Study

In [None]:
# Analyze per-component memorization
print('Memorization Analysis (Rare vs Common Components)\n')

for exp in summary['experiments'][:3]:  # First 3 experiments
    print(f"\n{'='*60}")
    print(f"Experiment: {exp['experiment_name']}")
    print(f"{'='*60}")
    
    mem_data = exp['memorization']
    
    # Extract component info
    components = []
    for k, info in mem_data.items():
        components.append({
            'Component': int(k),
            'Count': info['count'],
            'ELBO': info['elbo'],
            'Is_Rare': info['is_rare']
        })
    
    df_mem = pd.DataFrame(components).sort_values('Component')
    print(df_mem.to_string(index=False))
    
    # Check if rare components have different ELBO
    rare_elbo = df_mem[df_mem['Is_Rare']]['ELBO'].mean() if df_mem['Is_Rare'].any() else None
    common_elbo = df_mem[~df_mem['Is_Rare']]['ELBO'].mean()
    
    if rare_elbo:
        print(f"\nRare component ELBO: {rare_elbo:.4f}")
        print(f"Common components ELBO: {common_elbo:.4f}")
        print(f"Difference: {abs(rare_elbo - common_elbo):.4f}")

## Dimensionality Scaling Analysis

In [None]:
# Analyze dimensionality scaling
scaling_results = []
for exp in summary['scaling_experiments']:
    scaling_results.append({
        'd': exp['data']['d'],
        'Baseline_ARI': exp['baseline']['ari'],
        'Baseline_AUC': exp['baseline']['auc_avg'],
        'Retrain_Floor': exp['retrain']['auc_floor'],
        'ELBO_Gap_%': exp['retrain']['elbo_gap_percent'],
        'Train_Time_s': exp['baseline']['training_time_seconds']
    })

df_scaling = pd.DataFrame(scaling_results).sort_values('d')

print('Dimensionality Scaling (K=3, scenario=separated)\n')
print(df_scaling.to_string(index=False))

# Plot scaling
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Retrain Floor vs d
axes[0].plot(df_scaling['d'], df_scaling['Retrain_Floor'], 'o-', linewidth=2, markersize=8, color='#3498db')
axes[0].set_xlabel('Dimensionality (d)', fontsize=11)
axes[0].set_ylabel('Retrain Floor AUC', fontsize=11)
axes[0].set_title('Privacy Risk vs Dimension', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.3)

# ARI vs d
axes[1].plot(df_scaling['d'], df_scaling['Baseline_ARI'], 'o-', linewidth=2, markersize=8, color='#2ecc71')
axes[1].set_xlabel('Dimensionality (d)', fontsize=11)
axes[1].set_ylabel('Baseline ARI', fontsize=11)
axes[1].set_title('Clustering Quality vs Dimension', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.3)

# Training time vs d
axes[2].plot(df_scaling['d'], df_scaling['Train_Time_s'], 'o-', linewidth=2, markersize=8, color='#e74c3c')
axes[2].set_xlabel('Dimensionality (d)', fontsize=11)
axes[2].set_ylabel('Training Time (s)', fontsize=11)
axes[2].set_title('Computational Cost vs Dimension', fontsize=12, fontweight='bold')
axes[2].grid(True, alpha=0.3)

plt.suptitle('Dimensionality Scaling Analysis', fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('outputs/p3/mog/figure_dimensionality_scaling.png', dpi=150, bbox_inches='tight')
plt.show()

print('\nKey Finding:')
print(f"Retrain floor remains consistent across dimensions: {df_scaling['Retrain_Floor'].std():.4f} std")

## Visualize Latent Spaces

Display pre-generated latent visualizations for each experiment

In [None]:
# Display latent space visualizations
from IPython.display import Image, display
import os

viz_paths = [
    'outputs/p3/mog/K3_d2_n5000_separated_component_removal/latent_viz.png',
    'outputs/p3/mog/K3_d2_n5000_overlapping_component_removal/latent_viz.png',
    'outputs/p3/mog/K3_d5_n5000_separated_scattered/latent_viz.png'
]

print('Sample Latent Space Visualizations:\n')
for path in viz_paths:
    if os.path.exists(path):
        print(f"\n{os.path.basename(os.path.dirname(path))}:")
        display(Image(filename=path, width=800))
    else:
        print(f"Not found: {path}")

## Summary Statistics

In [None]:
# Final summary with retrain-floor context
print('='*70)
print('PHASE 3 SUMMARY: Mixture-of-Gaussians Simulations')
print('='*70)

print('\n1. EXPERIMENTS COMPLETED:')
print(f"   - Main scenarios: {len(summary['experiments'])}")
print(f"   - Dimensionality scaling: {len(summary['scaling_experiments'])}")
print(f"   - Total: {len(summary['experiments']) + len(summary['scaling_experiments'])}")

all_floors = [exp['retrain']['auc_floor'] for exp in summary['experiments']]
all_floors += [exp['retrain']['auc_floor'] for exp in summary['scaling_experiments']]

print('\n2. RETRAIN FLOOR STATISTICS:')
print(f"   - Mean: {np.mean(all_floors):.4f}")
print(f"   - Std:  {np.std(all_floors):.4f}")
print(f"   - Min:  {np.min(all_floors):.4f} (scattered forget)")
print(f"   - Max:  {np.max(all_floors):.4f} (component removal)")

print('\n3. KEY FINDINGS:')
print('   - Well-separated components → High privacy risk (AUC > 0.9)')
print('   - Overlapping components → Moderate risk (AUC ~ 0.67)')
print('   - Scattered forgetting → Near-random detection (AUC ~ 0.54)')
print('   - Retrain floor consistent across dimensions (d=2 to d=20)')

print('\n4. OUTPUTS GENERATED:')
print(f"   - {len(summary['experiments']) + len(summary['scaling_experiments'])} experiment directories")
print(f"   - {len(summary['experiments']) + len(summary['scaling_experiments'])} latent visualizations")
print('   - JSON results with all metrics')
print('   - 3 analysis figures (this notebook)')

print('\n' + '='*70)
print('Phase 3 (MoG simulations) COMPLETE')
print('='*70)