# Results analysis

This notebook computes various plots and tables comparing metrics for the different conformal methods.

In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path

import torch
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from moc.analysis.dataframes import (
	load_config, load_df, get_metric_df, get_datasets_df, to_latex, 
    Highlighter, agg_mean_sem, format_cell_latex, format_cell_jupyter
)
from moc.analysis.plot import plot_coverage_per_model, plot_n_samples, plot_ndim
from moc.analysis.plot_cd_diagram import draw_my_cd_diagram
from moc.analysis.helpers import main_metrics, other_metrics, conformal_methods
from moc.utils.general import filter_dict, set_notebook_options, savefig

set_notebook_options()

name = 'full'
path = Path('results') / name
path.mkdir(exist_ok=True, parents=True)

In [None]:
config = load_config(Path('logs') / name)
df = load_df(config)
df = get_metric_df(config, df)
df_ds = get_datasets_df(config, reload=False)

### CD diagrams

In [3]:
too_small = df_ds.query('`Nb instances` < 1000').reset_index()['Dataset']
df = df.query('dataset not in @too_small')

In [None]:
for model_name, model_df in df.groupby('model', dropna=False):
    plot_df = model_df.query('posthoc_method in @conformal_methods and dataset_group != "toy"').reset_index()
    for metric in main_metrics + ['score_time', 'test_coverage_time']:
        print(f'Plotting {model_name} {metric}', flush=True)
        draw_my_cd_diagram(plot_df, metric, config.alpha)
        savefig(path / 'cd_diagrams' / model_name / f'{metric}.pdf')

### Full horizontal results table

In [None]:
plot_df = df.query('model == "MQF2" and posthoc_method in ["M-CP", "HDR-CP", "DR-CP", "L-CP", "PCP", "HD-PCP", "C-PCP"] and dataset_group != "toy"')
pivot_df = plot_df.pivot_table(index='dataset', columns=('metric', 'name'), values='value', aggfunc=agg_mean_sem, observed=True)
styled_table = pivot_df.style.apply(Highlighter(config.alpha).highlight_min_per_metric, axis=None)
to_latex(styled_table.format(format_cell_latex), path / 'tables' / 'mqf2_real_wide.tex')
styled_table.format(format_cell_jupyter)

### Full vertical results table

In [None]:
plot_df = df.query('model == "MQF2" and posthoc_method in @conformal_methods and metric in @main_metrics and dataset_group != "toy"')
plot_df = plot_df.reset_index()
plot_df['name'] = pd.Categorical(plot_df['name'], conformal_methods)
pivot_df = plot_df.pivot_table(index=('dataset', 'name'), columns=('metric',), values='value', aggfunc=agg_mean_sem, observed=True)
styled_table = pivot_df.style.apply(Highlighter(config.alpha).highlight_min_per_dataset, axis=None)
to_latex(styled_table.format(format_cell_latex), path / 'tables' / 'mqf2_real.tex')
styled_table.format(format_cell_jupyter)

### Time table

In [None]:
plot_df = df.query('dataset_group != "toy" and metric == "total_time" and posthoc_method in @conformal_methods')
plot_df = plot_df.reset_index()
plot_df['name'] = pd.Categorical(plot_df['name'], conformal_methods)
pivot_df = plot_df.pivot_table(index=('dataset'), columns=('metric', 'name'), values='value', aggfunc=agg_mean_sem, observed=True)
styled_table = pivot_df.style.apply(Highlighter(config.alpha).highlight_min_per_metric, axis=None)
to_latex(styled_table.format(format_cell_latex), path / 'tables' / 'mqf2_real_total_time.tex')
display(styled_table.format(format_cell_jupyter))

### Result for Glow

In [None]:
plot_df = df.query('model == "Glow" and posthoc_method in @conformal_methods and metric in @main_metrics and metric != "region_size"')
plot_df = plot_df.reset_index()
plot_df['name'] = pd.Categorical(plot_df['name'], conformal_methods)
pivot_df = plot_df.pivot_table(index=('dataset', 'name'), columns=('metric',), values='value', aggfunc=agg_mean_sem, observed=True)
styled_table = pivot_df.style.apply(Highlighter(config.alpha).highlight_min_per_dataset, axis=None)
to_latex(styled_table.format(format_cell_latex), path / 'tables' / 'glow_real.tex')
styled_table.format(format_cell_jupyter)

### Coverage obtained by heuristic methods

In [None]:
posthoc_methods = ["HDR-H", "L-H", "HDR-CP", "L-CP"]
palette = {
    m: sns.color_palette('tab10')[i] for i, m in enumerate(posthoc_methods)
}
plot_df = df.query('dataset_group != "toy" and metric == "coverage" and posthoc_method in @posthoc_methods')
plot_coverage_per_model(plot_df, posthoc_methods, config.alpha, palette, path / 'coverage' / 'all.pdf')

### Hyperparameters analysis

In [10]:
if config.name == 'hparams_alpha_01':
    datasets = df.reset_index()['dataset'].unique()[5:]

    for method in ['PCP', 'HD-PCP', 'C-PCP', 'HDR-CP']:
        plot_n_samples(df, config, method, datasets)
        savefig(path / 'n_samples' / f'{method}.pdf')
    
    for method in ['PCP', 'HD-PCP', 'C-PCP', 'HDR-CP']:
        plot_n_samples(df, config, method, datasets, reg_line=True)
        savefig(path / 'n_samples_reg_line' / f'{method}.pdf')

### Output dimension analysis

In [None]:
if config.name == 'toy_ndim':
    df = df.query('dataset.notna()')

    for dataset in ['mvn_isotropic', 'mvn_diagonal', 'mvn_mixture', 'mvn_dependent']:
        plot_ndim(df, config, dataset)
        savefig(path / 'toy_ndim' / f'{dataset}.pdf')