In [10]:
import os
import pickle
import numpy as np
import pandas as pd

In [45]:
csv_files = [f'benchmark_tva_{model}_{dataset}_NEW.csv' for model in ['gptj', 'llama2_13b'] for dataset in ['sst5', 'trec', 'dbpedia']]
dfs = []
for file in csv_files:
    dfs.append(pd.read_csv(file))
df = pd.concat(dfs)
df = df[['model', 'dataset', 'num_shots', 'val_size', 'seed', 'method', 'accuracy', 'ECE10', 'ECE15']]
df = df.groupby(['model', 'dataset', 'num_shots', 'method']).mean().reset_index()
df = df[df['method'].isin(['original', 'ConC', 'LinC', 'LinC_postHoc_netcal_HB_tva_eqsize'])]
df['accuracy'] = df['accuracy'] * 100
df['ECE15'] = df['ECE15'] * 100
df['method'] = df['method'].replace({'original': 'Uncalibrated','LinC_postHoc_netcal_HB_tva_eqsize': r'LinC+HB\tiny\textsubscript{TvA}'})

df = pd.pivot_table(df, values=['accuracy', 'ECE15'], index=['model', 'num_shots', 'method'], columns='dataset').swaplevel(0, 1, axis=1).sort_index(axis=1, ascending=False)
s = df.style.format('{:.1f}') # float format
print(s.to_latex())

\begin{tabular}{lllrrrrrr}
 &  & dataset & \multicolumn{2}{r}{trec} & \multicolumn{2}{r}{sst5} & \multicolumn{2}{r}{dbpedia} \\
 &  &  & accuracy & ECE15 & accuracy & ECE15 & accuracy & ECE15 \\
model & num_shots & method &  &  &  &  &  &  \\
\multirow[c]{16}{*}{gptj} & \multirow[c]{4}{*}{0} & ConC & 40.0 & 14.0 & 40.7 & 10.3 & 47.7 & 24.6 \\
 &  & LinC & 58.9 & 26.4 & 46.3 & 11.0 & 62.2 & 12.8 \\
 &  & LinC+HB\tiny\textsubscript{TvA} & 58.9 & 6.5 & 46.3 & 7.0 & 62.2 & 5.7 \\
 &  & Uncalibrated & 24.7 & 29.7 & 33.7 & 22.5 & 19.7 & 27.4 \\
 & \multirow[c]{4}{*}{1} & ConC & 41.7 & 13.6 & 50.7 & 14.2 & 82.7 & 6.9 \\
 &  & LinC & 59.9 & 9.1 & 50.1 & 12.3 & 84.4 & 6.6 \\
 &  & LinC+HB\tiny\textsubscript{TvA} & 59.9 & 3.9 & 50.1 & 7.3 & 84.4 & 5.1 \\
 &  & Uncalibrated & 43.7 & 12.1 & 36.3 & 30.9 & 58.7 & 14.2 \\
 & \multirow[c]{4}{*}{4} & ConC & 40.3 & 14.4 & 54.3 & 8.8 & 94.0 & 6.9 \\
 &  & LinC & 57.9 & 9.7 & 53.6 & 10.6 & 94.3 & 5.7 \\
 &  & LinC+HB\tiny\textsubscript{TvA} & 57.9 & 5.2 &

In [43]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset,trec,trec,sst5,sst5,dbpedia,dbpedia
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,accuracy,ECE15,accuracy,ECE15,accuracy,ECE15
model,num_shots,method,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
gptj,0,ConC,40.0,13.959646,40.666667,10.297036,47.666667,24.573425
gptj,0,LinC,58.933333,26.413332,46.266667,10.968464,62.2,12.798724
gptj,0,LinC+HB\tiny\textsubscript{TvA},58.933333,6.523949,46.266667,7.016285,62.2,5.676205
gptj,0,Uncal.,24.666667,29.665075,33.666667,22.538899,19.666667,27.419591
gptj,1,ConC,41.666667,13.616554,50.666667,14.202314,82.666667,6.851721
gptj,1,LinC,59.933333,9.092028,50.133333,12.338831,84.4,6.586486
gptj,1,LinC+HB\tiny\textsubscript{TvA},59.933333,3.893534,50.133333,7.286914,84.4,5.081679
gptj,1,Uncal.,43.666667,12.116719,36.333333,30.851392,58.666667,14.225029
gptj,4,ConC,40.333333,14.41243,54.333333,8.783154,94.0,6.93773
gptj,4,LinC,57.866667,9.669524,53.6,10.580034,94.333333,5.651821
