# Setup
## Imports

In [None]:
%load_ext autoreload
%autoreload 2


In [None]:
import json
import pandas as pd
import numpy as np

from generative_social_choice.utils.postprocessing import (
    plot_sorted_utility_distributions,
    scalar_utility_metrics,
    plot_candidate_distribution_stacked,
    
)


## Load data

In [None]:
from generative_social_choice.utils.helper_functions import get_results_paths
from generative_social_choice.utils.postprocessing import plot_sorted_utility_CIs


def get_results(labelling_model: str, run_id: str, embedding_type: str = "llm"):
    result_paths = get_results_paths(labelling_model=labelling_model, baseline=False,  embedding_type=embedding_type, run_id=run_id)

    algo_assignment_result_dir = result_paths["assignments"]
    algo_assignment_files = {
        path.stem: path for path in algo_assignment_result_dir.glob("*.json")
    }

    algo_assignments = pd.DataFrame(columns=list(algo_assignment_files.keys())) #, index=baseline_assignments.index)
    utilities = pd.DataFrame(columns=list(algo_assignment_files.keys()))

    for algo_name, file_path in algo_assignment_files.items():
        with open(file_path, "r") as f:
            algo_assignment_data = (json.load(f))
            algo_utilities = pd.Series(algo_assignment_data['utilities'], index=algo_assignment_data['agent_ids'])
            utilities[algo_name] = algo_utilities
            cur_algo_assignments = pd.Series(algo_assignment_data['assignments'], index=algo_assignment_data['agent_ids'])
            algo_assignments[algo_name] = cur_algo_assignments


    #algo_assignments.head()
    #utilities.head()
    return utilities, algo_assignments


EMBEDDING_TYPE = "llm"
LABELLING_MODEL = "4o-mini"
RUN_IDS = [0, 1, 2, 3, 4]  # List of run IDs to analyze
METHOD = "exact"

# Collect metrics for all runs
all_metrics = []
all_algo_assignments = []
all_utilities = []
for run_id in RUN_IDS:
    utilities, algo_assignments = get_results(labelling_model=LABELLING_MODEL, run_id=run_id, embedding_type=EMBEDDING_TYPE)
    metrics = scalar_utility_metrics(utilities)
    all_metrics.append(metrics.loc[METHOD])
    all_algo_assignments.append(algo_assignments)
    all_utilities.append(utilities)

# Convert to DataFrame for easier analysis
metrics_df = pd.DataFrame(all_metrics)

# Calculate mean and 95% confidence intervals
mean_metrics = metrics_df.mean()
std_metrics = metrics_df.std()
ci_95 = 1.96 * std_metrics / np.sqrt(len(RUN_IDS))

# Create summary DataFrame with mean and confidence intervals
summary = pd.DataFrame({
    'Mean': mean_metrics,
    '95% CI Lower': mean_metrics - ci_95,
    '95% CI Upper': mean_metrics + ci_95
})

# Combine utilities for the selected method across all runs with MultiIndex columns
utilities_for_CI = pd.DataFrame({
    (METHOD, run_id): all_utilities[i][METHOD]
    for i, run_id in enumerate(RUN_IDS)
})
utilities_for_CI.columns = pd.MultiIndex.from_tuples(utilities_for_CI.columns)

# Now plot with CIs
plot_sorted_utility_CIs(utilities_for_CI)

summary


### Plots

In [None]:
fig = plot_sorted_utility_distributions(all_utilities[0])

In [None]:
fig = plot_candidate_distribution_stacked(all_algo_assignments[0])