In [None]:
%load_ext autoreload
%autoreload 2

%cd ../

In [None]:
import pandas as pd

from analysis.utils import (
    # hyperparameters
    get_hps_df,
    print_best_hp,
    plot_hps,

    # ETO and E2E results
    convert_to_long_df,
    get_df,
    plot_eto_vs_e2e,
    print_best_test_task_loss,
    print_eto_results,
)

## Hyperparameters

### Quantile regression

In [None]:
df = get_hps_df(
    'out/portfolio_syn_quantile/hyperparams_a{alpha:.2g}.csv',
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_best_hp(df, by='alpha')
print_best_hp(df, by=['seed', 'alpha'])
plot_hps(df, by_alpha=True)

### Gaussian regression

In [None]:
df = get_hps_df('out/portfolio_syn_gaussian/hyperparams.csv')
print_best_hp(df, by=None)
print_best_hp(df, by=['seed'])
plot_hps(df, by_alpha=False)

## ETO vs. E2E

### Quantile regression

In [None]:
cols = [
    'seed',
    'train_pinball_loss', 'train_task_loss',
    'train_coverage', 'train_coverage_no_conformal',
    'test_pinball_loss', 'test_task_loss',
    'test_coverage', 'test_coverage_no_conformal'
]

In [None]:
eto_df = get_df(
    fmt_str='out/portfolio_syn_quantile/eto_a{alpha:.2f}.csv',
    model='eto', cols=cols,
    lrs=None, l2regs=None,
    alphas=(0.01, 0.05, 0.1, 0.2),
)
print_eto_results(eto_df, print_best_hps=False)

In [None]:
ptc_box_df = get_df(
    fmt_str='out/portfolio_syn_ptc/ptc_box_a{alpha:.2f}.csv',
    cols=None, model='ptc_box',
    alphas=(0.01, 0.05, 0.1, 0.2),
)
ptc_box_df.groupby('alpha')[['test_task_loss', 'test_coverage']].agg(['mean', 'std'])

In [None]:
e2e_df = get_df(
    fmt_str='out/portfolio_syn_quantile/e2e_finetune_a{alpha:.2f}_lr{lr:.3g}_reg{l2reg:.3g}.csv',
    model='e2e', cols=cols + ['val_task_loss'],
    lrs=[1e-2, 1e-3, 1e-4],
    l2regs=[1e-4],
    alphas=(0.01, 0.05, 0.1, 0.2),
)
print_best_test_task_loss(e2e_df, by='val_task_loss')

In [None]:
long_df = convert_to_long_df(pd.concat([eto_df, e2e_df]).reset_index())
plot_eto_vs_e2e(long_df, num_rows=4)

### Gaussian regression

In [None]:
cols = [
    'seed',
    'train_nll_loss', 'train_task_loss',
    'train_coverage', 'train_coverage_no_conformal',
    'test_nll_loss', 'test_task_loss',
    'test_coverage', 'test_coverage_no_conformal'
]

In [None]:
eto_df = get_df(
    fmt_str='out/portfolio_syn_gaussian/eto_a{alpha:.2f}.csv',
    model='eto', cols=cols,
    lrs=None, l2regs=None,
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_eto_results(eto_df)

In [None]:
ptc_ellipse_df = get_df(
    fmt_str='out/portfolio_syn_ptc/ptc_ellipse_a{alpha:.2f}.csv',
    cols=None, model='ptc_ellipse',
    alphas=(0.01, 0.05, 0.1, 0.2),
)
ptc_ellipse_df.groupby('alpha')[['test_task_loss', 'test_coverage']].agg(['mean', 'std'])

In [None]:
ptc_ellipse_johnstone_df = get_df(
    fmt_str='out/portfolio_syn_ptc/ptc_ellipse_johnstone_a{alpha:.2f}.csv',
    cols=None, model='ptc_ellipse_johnstone',
    alphas=(0.01, 0.05, 0.1, 0.2),
)
ptc_ellipse_johnstone_df.groupby('alpha')[['test_task_loss', 'test_coverage']].agg(['mean', 'std'])

In [None]:
e2e_df = get_df(
    fmt_str='out/portfolio_syn_gaussian/e2e_finetune_a{alpha:.2f}_lr{lr:.3g}_reg{l2reg:.2g}.csv',
    model='e2e', cols=cols + ['val_task_loss'],
    lrs=[1e-2, 1e-3, 1e-4],
    l2regs=[1e-2],
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_best_test_task_loss(e2e_df, by='val_task_loss')

In [None]:
long_df = convert_to_long_df(pd.concat([eto_df, e2e_df]).reset_index())
plot_eto_vs_e2e(long_df, num_rows=4)

### PICNN

In [None]:
cols = [
    'seed', 'train_task_loss', 'train_coverage',
    'test_task_loss', 'test_coverage',
]

In [None]:
eto_df = get_df(
    fmt_str='out/portfolio_syn_picnn/eto_a{alpha:.2f}_L2_d64_lr{lr:.3g}_reg{l2reg:.3g}.csv',
    model='eto', cols=cols,
    lrs=[1e-2, 1e-3, 1e-4],
    l2regs=[1e-2, 1e-3, 1e-4],
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_best_test_task_loss(eto_df, by='train_task_loss')

In [None]:
eto_df = get_df(
    fmt_str='out/portfolio_syn_picnn_d128/eto_a{alpha:.2f}_L2_d128_lr{lr:.3g}_reg{l2reg:.3g}.csv',
    model='eto', cols=cols,
    lrs=[1e-2],
    l2regs=[1e-3],
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_eto_results(eto_df, print_best_hps=True)

In [None]:
e2e_df = get_df(
    fmt_str='out/portfolio_syn_picnn/e2e_finetune_a{alpha:.2f}_L2_d64_lr{lr:.3g}_reg{l2reg:.2g}.csv',
    model='e2e', cols=cols + ['val_task_loss'],
    lrs=[1e-3, 1e-4],
    l2regs=[1e-3],
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_best_test_task_loss(e2e_df, by='val_task_loss')

In [None]:
e2e_df = get_df(
    fmt_str='out/portfolio_syn_picnn_lrschedule/e2e_finetune_a{alpha:.2f}_L2_d64_lr{lr:.3g}_reg{l2reg:.2g}.csv',
    model='e2e', cols=cols + ['val_task_loss'],
    lrs=[1e-3],
    l2regs=[1e-3],
    alphas=(0.05,)
)
print_best_test_task_loss(e2e_df, by='val_task_loss')

In [None]:
e2e_df = get_df(
    fmt_str='out/portfolio_syn_picnn_d128/e2e_finetune_a{alpha:.2f}_L2_d128_lr{lr:.3g}_reg{l2reg:.2g}.csv',
    model='e2e', cols=cols + ['val_task_loss'],
    lrs=[5e-3],
    l2regs=[1e-3],
    alphas=(0.01, 0.05, 0.1, 0.2)
)
print_best_test_task_loss(e2e_df, by='val_task_loss')

In [None]:
long_df = convert_to_long_df(pd.concat([eto_df, e2e_df]).reset_index())
plot_eto_vs_e2e(long_df, num_rows=2)