In [1]:
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

## Experiment parameters

In [2]:
# Number of time windows
T = 18

# Target 1-coverage for conformal prediction
alpha = 0.1

data_name="School"

## Load results

In [8]:
methods = ['BD', 'UA']
GNN_models = ['GCN', 'GAT']
regimes = ['Assisted Semi-Ind', 'Trans', 'Semi-Ind']
# regimes = ['Trans', 'Semi-Ind']
outputs = ['Accuracy', 'Avg Size', 'Coverage']

xlabels = ['', '10am', '', '12pm', '\nDay 1', '2pm', '', '4pm', '',
        '', '10am', '', '12pm', '\nDay 2', '2pm', '', '4pm', '']

def get_summary(score_function):
    results_file = f'results/School_{score_function}.pkl'

    with open(results_file, 'rb') as file:
        results = pickle.load(file)

    methods_list = []
    GNN_models_list = []
    regimes_list = []
    outputs_list = []
    stat_types_list = []
    stats_list = []

    for (method, GNN_model, regime, output) in product(methods, GNN_models, regimes, outputs):

        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('Mean')
        stats_list.append(np.round(np.mean(results[method][GNN_model][regime][output]['All']), 3))
        
        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('St Dev')
        stats_list.append(np.round(np.std(results[method][GNN_model][regime][output]['All']), 3))
        
    output = "TSC"
    for (method, GNN_model, regime) in product(methods, GNN_models, regimes):
        # if regime == "Semi-Ind":
        #     num_vals = 50
        # elif regime in ["Assisted Semi-Ind", "Assisted Semi-Ind"]:
        #     # num_vals = 1000

        num_vals = len(results[method][GNN_model][regime]['Coverage']['All'])

        T_output = np.where(np.array([len(results[method][GNN_model][regime]["Coverage"][t]) for t in range(T)]) > 0)[0]


        covs = np.zeros((T, num_vals))
        for t in T_output:
            covs[t] = results[method][GNN_model][regime]["Coverage"][t]

        # min_covs = []
        # for cov_run in range(num_vals):
        #     covs_for_run = covs[:, cov_run]
        #     covs_for_run = covs_for_run[covs_for_run > 0]
        #     min_covs.append(np.min(covs_for_run))

        TSC = []
        for t in T_output:
            TSC.append(np.mean(covs[t, :]))

        min_TSC_idx = np.argmin(TSC)
        min_TSC = TSC[min_TSC_idx]
        std_min_TSC = np.std(covs[T_output[min_TSC_idx], :])


        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('Mean')
        stats_list.append(np.round(min_TSC, 3))

        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('St Dev')
        stats_list.append(np.round(std_min_TSC, 3)) 


    df_summary = pd.DataFrame({
        'method'   : methods_list,
        'GNN model': GNN_models_list,
        'regime'   : regimes_list,
        'output'   : outputs_list,
        'statistic': stat_types_list,
        'value'    : stats_list,
        'score_function': score_function
    })
    return df_summary


Display full table of statistics.

In [9]:
df_summary = get_summary("APS")
print(df_summary)

   method GNN model             regime    output statistic  value  \
0      BD       GCN  Assisted Semi-Ind  Accuracy      Mean  0.117   
1      BD       GCN  Assisted Semi-Ind  Accuracy    St Dev  0.010   
2      BD       GCN  Assisted Semi-Ind  Avg Size      Mean  9.121   
3      BD       GCN  Assisted Semi-Ind  Avg Size    St Dev  0.254   
4      BD       GCN  Assisted Semi-Ind  Coverage      Mean  0.913   
..    ...       ...                ...       ...       ...    ...   
91     UA       GAT  Assisted Semi-Ind       TSC    St Dev  0.052   
92     UA       GAT              Trans       TSC      Mean  0.798   
93     UA       GAT              Trans       TSC    St Dev  0.070   
94     UA       GAT           Semi-Ind       TSC      Mean  0.848   
95     UA       GAT           Semi-Ind       TSC    St Dev  0.060   

   score_function  
0             APS  
1             APS  
2             APS  
3             APS  
4             APS  
..            ...  
91            APS  
92         

In [10]:
df_summary[df_summary['output'] == "Coverage"]

Unnamed: 0,method,GNN model,regime,output,statistic,value,score_function
4,BD,GCN,Assisted Semi-Ind,Coverage,Mean,0.913,APS
5,BD,GCN,Assisted Semi-Ind,Coverage,St Dev,0.024,APS
10,BD,GCN,Trans,Coverage,Mean,0.901,APS
11,BD,GCN,Trans,Coverage,St Dev,0.012,APS
16,BD,GCN,Semi-Ind,Coverage,Mean,0.821,APS
17,BD,GCN,Semi-Ind,Coverage,St Dev,0.049,APS
22,BD,GAT,Assisted Semi-Ind,Coverage,Mean,0.922,APS
23,BD,GAT,Assisted Semi-Ind,Coverage,St Dev,0.027,APS
28,BD,GAT,Trans,Coverage,Mean,0.901,APS
29,BD,GAT,Trans,Coverage,St Dev,0.012,APS


In [15]:
score_functions = ["APS", "RAPS", "SAPS"]
df_summaries = [get_summary(score_function) for score_function in score_functions]

In [18]:
table_start_start_str = "\\begin{table}[ht]\n\\centering\n"
print(table_start_start_str)
for data_idx in range(len(score_functions)):
    score_function = score_functions[data_idx]
    df_summary = df_summaries[data_idx]

    df_summary["name"] = df_summary["method"] + " " + df_summary["GNN model"]

    replace_dict = {
        "BD": "Block ",
        "UA": "U",
    }

    df_summary["name"] = df_summary["name"].replace(replace_dict)

    output = "Avg Size"

    table_start_str = (
        "\\begin{subtable}{\\textwidth}\n\\centering\n\\begin{tabular}{|l|l|l|l|}\n\\hline\nEmbedding & \\multicolumn{3}{c|}{"
        + f"{data_name}"
        + "} \\\ \n\\cline{2-4}\n& Trans. & Semi-ind. & Temp. Trans. \\\ \n\\hline\n"
    )

    table_data = ""
    # Loop over each unique method + regime pair
    for (GNN_model, method) in product(GNN_models, methods):

        max_trans_acc = df_summary[
            (df_summary["regime"] == "Trans") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
        ]["value"].max()
        max_semi_ind_acc = df_summary[
            (df_summary["regime"] == "Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
        ]["value"].max()
        max_temp_trans_acc = df_summary[
            (df_summary["regime"] == "Assisted Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
        ]["value"].max()

        trans_acc = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Trans")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "Mean")
        ]["value"].values[0]
        trans_std = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Trans")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "St Dev")
        ]["value"].values[0]

        semi_ind_acc = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "Mean")
        ]["value"].values[0]
        semi_ind_std = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "St Dev")
        ]["value"].values[0]

        temp_trans_acc = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Assisted Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "Mean")
        ]["value"].values[0]
        temp_trans_std = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Assisted Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "St Dev")
        ]["value"].values[0]

        def format_cell(mean, std):
            if output == "Coverage":
                value = f"{mean:.3f} $\\pm$ {std:.3f}"
                if output == "Coverage" and (mean + std) >= 0.9:
                    return f"\\textbf{{{value}}}"

            elif output == "Avg Size":
                value = f"{mean:.3f} $\\pm$ {std:.3f}"
                
            return value

        table_data += f"{replace_dict[method]}{GNN_model} & {format_cell(trans_acc, trans_std)} & {format_cell(semi_ind_acc, semi_ind_std)} & {format_cell(temp_trans_acc, temp_trans_std)} \\\\ \\hline\n"

    table_str = table_start_str + table_data + "\\end{tabular}\n\\caption{" + f"{output} for the {data_name} experiment using {score_function}." + "}\n\\end{subtable}"

    print(table_str)
    print("")

table_end_end_str = "\n\\caption{}\n\\end{table}"
print(table_end_end_str)

\begin{table}[ht]
\centering



UnboundLocalError: local variable 'value' referenced before assignment

In [None]:
df_summary["name"] = df_summary["method"] + " " + df_summary["score_function"]

replace_dict = {
    "BD GCN": "Block GCN",
    "BD GAT": "Block GAT",
    "UA GCN": "UGCN",
    "UA GAT": "UGAT",
}

df_summary["name"] = df_summary["name"].replace(replace_dict)


output="TSC"

table_start_str = (
    "\\begin{table}[h]\n\\centering\n\\begin{tabular}{|l|l|l|}\n\\hline\nEmbedding & \\multicolumn{2}{c|}{"
    + data_name
    + "} \\\ \n\\cline{2-3}\n& Trans. & Semi-ind.  \\\ \n\\hline\n"
)

# table_data_1 = "ISE & $0.505 \\pm 0.000$ & $0.248 \\pm 0.000$ \\\\ \\hline\n"


table_data = ""
# Loop over each unique method + regime pair
for (GNN_model, method) in product(GNN_models, methods):

    max_trans_acc = df_summary[
        (df_summary["regime"] == "Trans") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
    ]["value"].max()
    max_semi_ind_acc = df_summary[
        (df_summary["regime"] == "Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
    ]["value"].max()

    trans_acc = df_summary[
        (df_summary["method"] == method)
        & (df_summary["GNN model"] == GNN_model)
        & (df_summary["regime"] == "Trans")
        & (df_summary["output"] == output)
        & (df_summary["statistic"] == "Mean")
    ]["value"].values[0]
    trans_std = df_summary[
        (df_summary["method"] == method)
        & (df_summary["GNN model"] == GNN_model)
        & (df_summary["regime"] == "Trans")
        & (df_summary["output"] == output)
        & (df_summary["statistic"] == "St Dev")
    ]["value"].values[0]
    semi_ind_acc = df_summary[
        (df_summary["method"] == method)
        & (df_summary["GNN model"] == GNN_model)
        & (df_summary["regime"] == "Semi-Ind")
        & (df_summary["output"] == output)
        & (df_summary["statistic"] == "Mean")
    ]["value"].values[0]
    semi_ind_std = df_summary[
        (df_summary["method"] == method)
        & (df_summary["GNN model"] == GNN_model)
        & (df_summary["regime"] == "Semi-Ind")
        & (df_summary["output"] == output)
        & (df_summary["statistic"] == "St Dev")
    ]["value"].values[0]

    method_name = df_summary[
        (df_summary["method"] == method)
        & (df_summary["GNN model"] == GNN_model)
    ]["name"].values[0]
    

    if trans_acc != max_trans_acc:
        table_data += f"{method_name} & {trans_acc:.3f} $\\pm$ {trans_std:.3f}"
    else:
        table_data += (
            method_name + " & \\textbf{" + f"{trans_acc:.3f} $\\pm$ {trans_std:.3f}" + "}"
        )

    if semi_ind_acc != max_semi_ind_acc:
        table_data += f" & ${semi_ind_acc:.3f} \\pm {semi_ind_std:.3f}$ \\\\ \\hline\n"
    else:
        table_data += (
            " & \\textbf{"
            + f"{semi_ind_acc:.3f} $\\pm$ {semi_ind_std:.3f}"
            + "} \\\\ \\hline\n"
        )

table_end_str = (
    "\\end{tabular}\n\\caption{}\n\\label{tab:"
    + data_name
    + "_"+output+"}\n\\end{table}"
)

full_table = table_start_str + table_data + table_end_str

print(full_table)

IndexError: index 0 is out of bounds for axis 0 with size 0