# FairPareto Results Analysis

This notebook analyzes the experimental results from the FairPareto evaluations.

In [None]:
import pickle
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import utilities
import sys
sys.path.append('..')
from utils.plotting import plot_multiple_pareto_fronts, plot_baseline_comparison

sns.set_style('whitegrid')
%matplotlib inline

## Load Results

In [None]:
results_dir = Path('../results')
metrics_dir = results_dir / 'metrics'

# Load all available results
all_results = {}

for results_file in metrics_dir.glob('*_pareto.pkl'):
    dataset_name = results_file.stem.replace('_pareto', '')
    with open(results_file, 'rb') as f:
        all_results[dataset_name] = pickle.load(f)

print(f"Loaded results for {len(all_results)} datasets:")
for name in sorted(all_results.keys()):
    print(f"  - {name}: {len(all_results[name])} points")

## Summary Statistics

In [None]:
summary_data = []

for dataset_name, pareto_front in all_results.items():
    gammas = list(pareto_front.keys())
    accuracies = list(pareto_front.values())
    
    summary_data.append({
        'Dataset': dataset_name,
        'Points': len(pareto_front),
        'Min Gamma': f"{min(gammas):.4f}",
        'Max Gamma': f"{max(gammas):.4f}",
        'Min Acc': f"{min(accuracies):.4f}",
        'Max Acc': f"{max(accuracies):.4f}",
        'Acc Range': f"{max(accuracies) - min(accuracies):.4f}"
    })

summary_df = pd.DataFrame(summary_data)
summary_df

## Visualize Individual Pareto Fronts

In [None]:
# Plot first few datasets
datasets_to_plot = list(all_results.keys())[:6]

fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.flatten()

for idx, dataset_name in enumerate(datasets_to_plot):
    ax = axes[idx]
    pf = all_results[dataset_name]
    
    gammas = sorted(pf.keys())
    accs = [pf[g] for g in gammas]
    
    ax.plot(gammas, accs, 'go-', linewidth=2, markersize=6)
    ax.set_xlabel('Statistical Parity (γ)')
    ax.set_ylabel('Accuracy')
    ax.set_title(dataset_name, fontweight='bold')
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Compare Multiple Datasets

In [None]:
# Select datasets to compare
datasets_to_compare = list(all_results.keys())[:5]
comparison_data = {name: all_results[name] for name in datasets_to_compare}

fig = plot_multiple_pareto_fronts(
    comparison_data,
    title='Pareto Fronts Comparison',
    figsize=(12, 7)
)
plt.show()

## Analyze Trade-off Characteristics

In [None]:
# Compute trade-off slope for each dataset
tradeoff_data = []

for dataset_name, pf in all_results.items():
    gammas = sorted(pf.keys())
    accs = [pf[g] for g in gammas]
    
    # Compute average slope
    slopes = []
    for i in range(len(gammas) - 1):
        delta_acc = accs[i+1] - accs[i]
        delta_gamma = gammas[i+1] - gammas[i]
        if delta_gamma > 0:
            slopes.append(abs(delta_acc / delta_gamma))
    
    avg_slope = np.mean(slopes) if slopes else 0
    
    tradeoff_data.append({
        'Dataset': dataset_name,
        'Avg Trade-off Slope': avg_slope,
        'Max Accuracy': max(accs),
        'Accuracy Drop': max(accs) - min(accs)
    })

tradeoff_df = pd.DataFrame(tradeoff_data)
tradeoff_df = tradeoff_df.sort_values('Avg Trade-off Slope', ascending=False)
tradeoff_df

## Baseline Comparison Analysis

In [None]:
# Load baseline comparison results if available
comparison_results = {}

for comp_file in metrics_dir.glob('*_comparison.pkl'):
    dataset_name = comp_file.stem.replace('_comparison', '')
    with open(comp_file, 'rb') as f:
        comparison_results[dataset_name] = pickle.load(f)

if comparison_results:
    print(f"Found baseline comparisons for {len(comparison_results)} datasets:")
    for name in comparison_results.keys():
        print(f"  - {name}")
else:
    print("No baseline comparison results found.")
    print("Run: python experiments/baseline_comparison.py --datasets ADULT COMPAS LSAC")

## Export Summary for Paper

In [None]:
# Create LaTeX table for paper
latex_table = summary_df.to_latex(index=False, float_format="%.4f")
print("LaTeX Table:")
print(latex_table)

# Save to file
with open(results_dir / 'summary_table.tex', 'w') as f:
    f.write(latex_table)

print("\nTable saved to results/summary_table.tex")