In [1]:
import pandas as pd
import numpy as np
import copy 
import os
import re
from collections import OrderedDict
from pprint import pprint
import itertools

In [2]:
data_models = ['cifar10', 'mnist', 'twenty_newsgroups','tinyimagenet100']
dm_identifier = {'cifar10': 'CIFAR-10', 
                'mnist': 'MNIST', 
                'twenty_newsgroups': '20newsgroups',
                'tinyimagenet100': 'TinyImageNet-100'}
sub_metrics = ['Error', 'Coverage']

def helper_find_files_and_read_dataframe(root_path, patterns=[r"cifar10"]):
    #dm_df= OrderedDict((dm, None) for dm in dm_identifier.keys()) 
    dm_df= OrderedDict((dm, None) for dm in dm_identifier.keys()) 
    for root, _, files in os.walk(root_path):
        for filename in files:
            filepath = os.path.join(root, filename)
            if os.path.isfile(filepath) and filepath.endswith(".xlsx"):
                for pattern in patterns:
                    if re.search(pattern, filename, re.IGNORECASE):
                        dm_df[pattern] = pd.read_excel(filepath, sheet_name=0).drop(columns=['Unnamed: 0']).copy(deep=True)
                        # dm_df[pattern].append(pd.read_excel(filepath, sheet_name=0).drop(columns=['Unnamed: 0']).copy(deep=True))
                        break  # Stop checking patterns for this file
    return dm_df 

def read_and_get_filtered_dataframes(root_path, patterns=[r"cifar10"]):
    dm_df = helper_find_files_and_read_dataframe(root_path, patterns = patterns)
    # Apply filter to all dataframes
    for dm, df in dm_df.items():
        if df is None:
            continue
        df1 = copy.copy(df) # Shallow copy to new dataframe
        df1['calib_conf'] = df1['calib_conf'].fillna("None")
        df1['calib_conf'] = df1['calib_conf'].astype(str)

        # Sort by col: Coverage-Mean in descending order, and then by col: calib_conf in ascending order
        df2 = df1.sort_values(["Coverage-Mean", "calib_conf"], ascending = [False, True]).copy(deep=True)
        # Retain the first row for each unique value in col: calib_conf
        df3 = df2.drop_duplicates(subset=['calib_conf'], keep='first').copy(deep=True)
        dm_df[dm] = df3
    return dm_df 

dm_df = read_and_get_filtered_dataframes(
    root_path = "../outputs/final_results", 
    patterns = dm_identifier.keys())

In [3]:
cms_ = OrderedDict({'None': '-',
                    'auto_label_opt_v0': 'Ours' ,
                    'temp_scaling': 'TS',
                    'dirichlet': 'Dirichlet',
                    'scaling_binning': 'SB',
                    'histogram_binning_top_label': 'Top-HB'})
ttms_ = OrderedDict({'std_cross_entropy': 'Vanilla',
                     'crl': 'CRL', 
                     'fmfp': 'FMFP', 
                     'squentropy': 'Squentropy'})
visited = []
body_txt= ""
bs = "\\"
num_dp = 2
global_font_size = (8,11)
std_font_size = (6, 11)
for tm, cm in itertools.product(ttms_.keys(), cms_.keys()):
    # Add post-hoc method name 
    # cross_prod_i = cm.replace("_", "\\_")
    cross_prod_i = cms_[cm]
    if tm not in visited:
        #temp_tm = tm.replace("_", "\\_")
        temp_tm = ttms_[tm]
        cross_prod_i = rf"""\multirow{{6}}{{*}}{{{temp_tm}}}                     & """ + cross_prod_i 
        visited.append(tm)
    else:
        cross_prod_i = " ".join(["                                 & ", cross_prod_i]) 

    # For each dataset, add columns for Error and Coverage 
    for dm, df in dm_df.items():
        if df is not None:
            mask1 = (df["calib_conf"] == f"{cm}") & (df["training_conf"] == f"{tm}")
            al_mean = df[mask1]['Auto-Labeling-Err-Mean'].values[0] if not df[mask1]['Auto-Labeling-Err-Mean'].empty else -1 
            al_std = df[mask1]['Auto-Labeling-Err-Std'].values[0] if not df[mask1]['Auto-Labeling-Err-Std'].empty else -1 
            c_mean = df[mask1]['Coverage-Mean'].values[0] if not df[mask1]['Coverage-Mean'].empty else -1 
            c_std= df[mask1]['Coverage-Std'].values[0] if not df[mask1]['Coverage-Std'].empty else -1 
        else:
            al_mean, al_std, c_mean, c_std = -1, -1, -1, -1 
        open_std_font = "{" + f"{bs}fontsize{{{std_font_size[0]}}}{{{std_font_size[1]}}}{bs}selectfont"
        closing_std_font = "}" 
        cross_prod_i = cross_prod_i + " & " + f""" { rf"{al_mean:.{num_dp}f}" + rf" ${bs}pm$ " + open_std_font + rf"{al_std:.{num_dp}f}" } """ + closing_std_font + " & " + f""" { rf"{c_mean:.{num_dp}f}" + rf" ${bs}pm$ " + open_std_font + rf"{c_std:.{num_dp}f}" } """ + closing_std_font

    if cm == list(cms_.keys())[-1] and tm == list(ttms_.keys())[-1]:
        line = rf"\bottomrule"
    elif cm == list(cms_.keys())[-1] and tm != list(ttms_.keys())[-1]:
        line = "\hline"
    else:
        line = ""
    cross_prod_i = cross_prod_i + r"\\" + line
    body_txt= body_txt+ cross_prod_i + "\n"

In [4]:


metrics_txt = " & ".join( [ "\multicolumn{1}{c}" + "{" + rf"\textbf" + "{" + sm + "}" + "}" for sm in sub_metrics] * len(data_models))
data_models_txt = ' & ' + ' & '.join([rf"\multicolumn{{{len(sub_metrics)}}}{{c}}" + "{" + rf"\textbf" + rf"{{{dm_identifier[dm]}}}" + "}" for dm in data_models])
caption = "Example TBAL LaTeX Table"
# \fontsize{}{} # Set font size to 9pt with 11pt baselineskip
template = rf"""
\begin{{table*}}[t]
\fontsize{{{global_font_size[0]}}}{{{global_font_size[1]}}}\selectfont
\begin{{tabular}}{{llllllllll}}
\toprule
\multicolumn{{1}}{{c}}{{\multirow{{2}}{{*}}{{\textbf{{Train-time}}}}}} & \multicolumn{{1}}{{c}}{{\multirow{{2}}{{*}}{{\textbf{{Post-hoc}}}}}} {data_models_txt} \\ \cline{{3-10}}
\multicolumn{{1}}{{c}}{{}}                      & \multicolumn{{1}}{{c}}{{}}  & {metrics_txt} \\ \toprule 
""" + body_txt + rf"""
\end{{tabular}}
\caption{{{caption}}}
\end{{table*}}"""



In [5]:
with open("./final_table_latex_template.txt", "w") as file:
    file.write(template)