In [1]:
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

## Experiment parameters

In [2]:
# Number of time windows
T = 18

# Target 1-coverage for conformal prediction
alpha = 0.1

data_name="School"

## Load results

Save figures into a special folder.

In [3]:
save_figs = False
output_file_prefix = 'figures/School_'
output_file_suffix = '_10_100_50.pdf'

In [4]:
xlabels = ['', '10am', '', '12pm', '\nDay 1', '2pm', '', '4pm', '',
           '', '10am', '', '12pm', '\nDay 2', '2pm', '', '4pm', '']

## Summary results

In [5]:
methods = ['BD', 'UA']
GNN_models = ['GCN', 'GAT']
regimes = ['Assisted Semi-Ind', 'Trans', 'Semi-Ind']
# regimes = ['Trans', 'Semi-Ind']
outputs = ['Accuracy', 'Avg Size', 'Coverage']

In [18]:
import os

props = []
results_filenames = []
for filename in os.listdir('results'):
    if "].pkl" in filename:
        results_filenames.append(filename)
        prop_vals = np.array(filename.split('[')[1].split(']')[0].split(' '))
        prop_vals = prop_vals.astype(float)
        assert np.sum(prop_vals) == 1
        props.append(prop_vals)

In [34]:

df_summaries = []

for prop, filename in zip(props, results_filenames):
    results_file = f'results/{filename}'

    with open(results_file, 'rb') as file:
        results = pickle.load(file)

    methods_list = []
    GNN_models_list = []
    regimes_list = []
    outputs_list = []
    stat_types_list = []
    stats_list = []

    for (method, GNN_model, regime, output) in product(methods, GNN_models, regimes, outputs):

        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('Mean')
        stats_list.append(np.round(np.mean(results[method][GNN_model][regime][output]['All']), 3))
        
        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('St Dev')
        stats_list.append(np.round(np.std(results[method][GNN_model][regime][output]['All']), 3))
        
    output = "TSC"
    for (method, GNN_model, regime) in product(methods, GNN_models, regimes):
        if regime == "Semi-Ind":
            num_vals = 50
        elif regime in ["Assisted Semi-Ind", "Assisted Semi-Ind"]:
            num_vals = 1000

        T_output = np.where(np.array([len(results[method][GNN_model][regime]["Coverage"][t]) for t in range(T)]) > 0)[0]


        covs = np.zeros((T, num_vals))
        for t in T_output:
            covs[t] = results[method][GNN_model][regime]["Coverage"][t]

        # min_covs = []
        # for cov_run in range(num_vals):
        #     covs_for_run = covs[:, cov_run]
        #     covs_for_run = covs_for_run[covs_for_run > 0]
        #     min_covs.append(np.min(covs_for_run))

        TSC = []
        for t in T_output:
            TSC.append(np.mean(covs[t, :]))

        min_TSC_idx = np.argmin(TSC)
        min_TSC = TSC[min_TSC_idx]
        std_min_TSC = np.std(covs[T_output[min_TSC_idx], :])


        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('Mean')
        stats_list.append(np.round(min_TSC, 3))

        methods_list.append(method)
        GNN_models_list.append(GNN_model)
        regimes_list.append(regime)
        outputs_list.append(output)
        stat_types_list.append('St Dev')
        stats_list.append(np.round(std_min_TSC, 3)) 


    df_summary = pd.DataFrame({
        'method'   : methods_list,
        'GNN model': GNN_models_list,
        'regime'   : regimes_list,
        'output'   : outputs_list,
        'statistic': stat_types_list,
        'value'    : stats_list,
        'data split': "/".join(prop.astype(str)),
    })
    df_summaries.append(df_summary)

Display full table of statistics.

In [35]:
print(df_summaries[0])

   method GNN model             regime    output statistic  value  \
0      BD       GCN  Assisted Semi-Ind  Accuracy      Mean  0.112   
1      BD       GCN  Assisted Semi-Ind  Accuracy    St Dev  0.010   
2      BD       GCN  Assisted Semi-Ind  Avg Size      Mean  9.134   
3      BD       GCN  Assisted Semi-Ind  Avg Size    St Dev  0.164   
4      BD       GCN  Assisted Semi-Ind  Coverage      Mean  0.909   
..    ...       ...                ...       ...       ...    ...   
91     UA       GAT  Assisted Semi-Ind       TSC    St Dev  0.042   
92     UA       GAT              Trans       TSC      Mean  0.813   
93     UA       GAT              Trans       TSC    St Dev  0.071   
94     UA       GAT           Semi-Ind       TSC      Mean  0.842   
95     UA       GAT           Semi-Ind       TSC    St Dev  0.053   

             data split  
0   0.25/0.25/0.25/0.25  
1   0.25/0.25/0.25/0.25  
2   0.25/0.25/0.25/0.25  
3   0.25/0.25/0.25/0.25  
4   0.25/0.25/0.25/0.25  
..             

In [36]:
df_summary[df_summary['output'] == 'Coverage']

Unnamed: 0,method,GNN model,regime,output,statistic,value,data split
4,BD,GCN,Assisted Semi-Ind,Coverage,Mean,0.91,0.05/0.35/0.3/0.3
5,BD,GCN,Assisted Semi-Ind,Coverage,St Dev,0.017,0.05/0.35/0.3/0.3
10,BD,GCN,Trans,Coverage,Mean,0.901,0.05/0.35/0.3/0.3
11,BD,GCN,Trans,Coverage,St Dev,0.013,0.05/0.35/0.3/0.3
16,BD,GCN,Semi-Ind,Coverage,Mean,0.87,0.05/0.35/0.3/0.3
17,BD,GCN,Semi-Ind,Coverage,St Dev,0.039,0.05/0.35/0.3/0.3
22,BD,GAT,Assisted Semi-Ind,Coverage,Mean,0.908,0.05/0.35/0.3/0.3
23,BD,GAT,Assisted Semi-Ind,Coverage,St Dev,0.019,0.05/0.35/0.3/0.3
28,BD,GAT,Trans,Coverage,Mean,0.901,0.05/0.35/0.3/0.3
29,BD,GAT,Trans,Coverage,St Dev,0.013,0.05/0.35/0.3/0.3


In [62]:
table_start_start_str = "\\begin{table}[ht]\n\\centering\n"
print(table_start_start_str)
for data_idx in range(len(props)):
    prop = props[data_idx]
    df_summary = df_summaries[data_idx]

    df_summary["name"] = df_summary["method"] + " " + df_summary["GNN model"]

    replace_dict = {
        "BD": "Block ",
        "UA": "U",
    }

    df_summary["name"] = df_summary["name"].replace(replace_dict)

    output = "Coverage"

    table_start_str = (
        "\\begin{subtable}{\\textwidth}\n\\centering\n\\begin{tabular}{|l|l|l|l|}\n\\hline\nEmbedding & \\multicolumn{3}{c|}{"
        + f"{data_name}"
        + "} \\\ \n\\cline{2-4}\n& Trans. & Semi-ind. & Temp. Trans. \\\ \n\\hline\n"
    )

    table_data = ""
    # Loop over each unique method + regime pair
    for (GNN_model, method) in product(GNN_models, methods):

        max_trans_acc = df_summary[
            (df_summary["regime"] == "Trans") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
        ]["value"].max()
        max_semi_ind_acc = df_summary[
            (df_summary["regime"] == "Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
        ]["value"].max()
        max_temp_trans_acc = df_summary[
            (df_summary["regime"] == "Assisted Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
        ]["value"].max()

        trans_acc = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Trans")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "Mean")
        ]["value"].values[0]
        trans_std = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Trans")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "St Dev")
        ]["value"].values[0]

        semi_ind_acc = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "Mean")
        ]["value"].values[0]
        semi_ind_std = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "St Dev")
        ]["value"].values[0]

        temp_trans_acc = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Assisted Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "Mean")
        ]["value"].values[0]
        temp_trans_std = df_summary[
            (df_summary["method"] == method)
            & (df_summary["GNN model"] == GNN_model)
            & (df_summary["regime"] == "Assisted Semi-Ind")
            & (df_summary["output"] == output)
            & (df_summary["statistic"] == "St Dev")
        ]["value"].values[0]

        def format_cell(mean, std):
            if output == "Coverage":
                value = f"{mean} $\\pm$ {std}"
                if output == "Coverage" and (mean + std) >= 0.9:
                    return f"\\textbf{{{value}}}"
                
            return value

        table_data += f"{replace_dict[method]}{GNN_model} & {format_cell(trans_acc, trans_std)} & {format_cell(semi_ind_acc, semi_ind_std)} & {format_cell(temp_trans_acc, temp_trans_std)} \\\\ \\hline\n"

    table_str = table_start_str + table_data + "\\end{tabular}\n\\caption{" + f"{output} for the {data_name} experiment with data split {'/'.join((prop*100).astype(int).astype(str))}." + "}\n\\end{subtable}"

    print(table_str)
    print("")

table_end_end_str = "\n\\caption{}\n\\end{table}"
print(table_end_end_str)

\begin{table}[ht]
\centering

\begin{subtable}{\textwidth}
\centering
\begin{tabular}{|l|l|l|l|}
\hline
Embedding & \multicolumn{3}{c|}{School} \\ 
\cline{2-4}
& Trans. & Semi-ind. & Temp. Trans. \\ 
\hline
Block GCN & \textbf{0.901 $\pm$ 0.014} & 0.833 $\pm$ 0.046 & \textbf{0.909 $\pm$ 0.019} \\ \hline
UGCN & \textbf{0.901 $\pm$ 0.014} & \textbf{0.943 $\pm$ 0.018} & \textbf{0.903 $\pm$ 0.014} \\ \hline
Block GAT & \textbf{0.902 $\pm$ 0.014} & 0.681 $\pm$ 0.102 & \textbf{0.916 $\pm$ 0.026} \\ \hline
UGAT & \textbf{0.901 $\pm$ 0.014} & \textbf{0.912 $\pm$ 0.024} & \textbf{0.901 $\pm$ 0.014} \\ \hline
\end{tabular}
\caption{Coverage for the School experiment with data split 25/25/25/25.}
\end{subtable}

\begin{subtable}{\textwidth}
\centering
\begin{tabular}{|l|l|l|l|}
\hline
Embedding & \multicolumn{3}{c|}{School} \\ 
\cline{2-4}
& Trans. & Semi-ind. & Temp. Trans. \\ 
\hline
Block GCN & \textbf{0.903 $\pm$ 0.015} & \textbf{0.826 $\pm$ 0.075} & \textbf{0.913 $\pm$ 0.02} \\ \hline
UGCN &

In [70]:
table_start_start_str = "\\begin{table}[ht]\n\\centering\n"
print(table_start_start_str)
for data_idx in range(len(props)):
    prop = props[data_idx]
    df_summary = df_summaries[data_idx]

    df_summary["name"] = df_summary["method"] + " " + df_summary["GNN model"]

    replace_dict = {
        "BD": "Block ",
        "UA": "U",
    }

    df_summary["name"] = df_summary["name"].replace(replace_dict)

    output = "Accuracy"

    table_start_str = (
        "\\begin{subtable}{\\textwidth}\n\\centering\n\\begin{tabular}{|l|l|l|l|}\n\\hline\nEmbedding & \\multicolumn{3}{c|}{"
        + f"{data_name}"
        + "} \\\ \n\\cline{2-4}\n& Trans. & Semi-ind. & Temp. Trans. \\\ \n\\hline\n"
    )



    table_data = ""
    # Loop over each unique method + regime pair
    for GNN_model in GNN_models:
        max_trans_acc = df_summary[
            (df_summary["regime"] == "Trans") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model) & (df_summary["statistic"] == "Mean")
        ]["value"].max()
        max_semi_ind_acc = df_summary[
            (df_summary["regime"] == "Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model) & (df_summary["statistic"] == "Mean")
        ]["value"].max()
        max_temp_trans_acc = df_summary[
            (df_summary["regime"] == "Assisted Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model) & (df_summary["statistic"] == "Mean")
        ]["value"].max()

        for method in methods:
            trans_acc = df_summary[
                (df_summary["method"] == method)
                & (df_summary["GNN model"] == GNN_model)
                & (df_summary["regime"] == "Trans")
                & (df_summary["output"] == output)
                & (df_summary["statistic"] == "Mean")
            ]["value"].values[0]
            trans_std = df_summary[
                (df_summary["method"] == method)
                & (df_summary["GNN model"] == GNN_model)
                & (df_summary["regime"] == "Trans")
                & (df_summary["output"] == output)
                & (df_summary["statistic"] == "St Dev")
            ]["value"].values[0]

            semi_ind_acc = df_summary[
                (df_summary["method"] == method)
                & (df_summary["GNN model"] == GNN_model)
                & (df_summary["regime"] == "Semi-Ind")
                & (df_summary["output"] == output)
                & (df_summary["statistic"] == "Mean")
            ]["value"].values[0]
            semi_ind_std = df_summary[
                (df_summary["method"] == method)
                & (df_summary["GNN model"] == GNN_model)
                & (df_summary["regime"] == "Semi-Ind")
                & (df_summary["output"] == output)
                & (df_summary["statistic"] == "St Dev")
            ]["value"].values[0]

            temp_trans_acc = df_summary[
                (df_summary["method"] == method)
                & (df_summary["GNN model"] == GNN_model)
                & (df_summary["regime"] == "Assisted Semi-Ind")
                & (df_summary["output"] == output)
                & (df_summary["statistic"] == "Mean")
            ]["value"].values[0]
            temp_trans_std = df_summary[
                (df_summary["method"] == method)
                & (df_summary["GNN model"] == GNN_model)
                & (df_summary["regime"] == "Assisted Semi-Ind")
                & (df_summary["output"] == output)
                & (df_summary["statistic"] == "St Dev")
            ]["value"].values[0]

            def format_cell(mean, std, max_mean):
                value = f"{mean} $\\pm$ {std}"
                if output == "Accuracy" and mean == max_mean:
                    return f"\\textbf{{{value}}}"
                return value

            table_data += f"{replace_dict[method]}{GNN_model} & {format_cell(trans_acc, trans_std, max_trans_acc)} & {format_cell(semi_ind_acc, semi_ind_std, max_semi_ind_acc)} & {format_cell(temp_trans_acc, temp_trans_std, max_temp_trans_acc)} \\\\ \\hline\n"

    table_str = table_start_str + table_data + "\\end{tabular}\n\\caption{" + f"{output} for the {data_name} experiment with data split {'/'.join((prop*100).astype(int).astype(str))}." + "}\n\\end{subtable}"

    print(table_str)
    print("")

table_end_end_str = "\n\\caption{}\n\\end{table}"
print(table_end_end_str)

\begin{table}[ht]
\centering

\begin{subtable}{\textwidth}
\centering
\begin{tabular}{|l|l|l|l|}
\hline
Embedding & \multicolumn{3}{c|}{School} \\ 
\cline{2-4}
& Trans. & Semi-ind. & Temp. Trans. \\ 
\hline
Block GCN & 0.888 $\pm$ 0.013 & 0.109 $\pm$ 0.025 & 0.112 $\pm$ 0.01 \\ \hline
UGCN & \textbf{0.935 $\pm$ 0.007} & \textbf{0.896 $\pm$ 0.01} & \textbf{0.938 $\pm$ 0.006} \\ \hline
Block GAT & 0.834 $\pm$ 0.021 & 0.108 $\pm$ 0.028 & 0.106 $\pm$ 0.018 \\ \hline
UGAT & \textbf{0.908 $\pm$ 0.019} & \textbf{0.883 $\pm$ 0.014} & \textbf{0.904 $\pm$ 0.024} \\ \hline
\end{tabular}
\caption{Accuracy for the School experiment with data split 25/25/25/25.}
\end{subtable}

\begin{subtable}{\textwidth}
\centering
\begin{tabular}{|l|l|l|l|}
\hline
Embedding & \multicolumn{3}{c|}{School} \\ 
\cline{2-4}
& Trans. & Semi-ind. & Temp. Trans. \\ 
\hline
Block GCN & 0.911 $\pm$ 0.01 & 0.108 $\pm$ 0.035 & 0.118 $\pm$ 0.015 \\ \hline
UGCN & \textbf{0.931 $\pm$ 0.01} & \textbf{0.971 $\pm$ 0.008} & \textbf

In [37]:
# df_summary["name"] = df_summary["method"] + " " + df_summary["GNN model"]

# replace_dict = {
#     "BD GCN": "Block GCN",
#     "BD GAT": "Block GAT",
#     "UA GCN": "UGCN",
#     "UA GAT": "UGAT",
# }

# df_summary["name"] = df_summary["name"].replace(replace_dict)


# output="TSC"

# table_start_str = (
#     "\\begin{table}[h]\n\\centering\n\\begin{tabular}{|l|l|l|}\n\\hline\nEmbedding & \\multicolumn{2}{c|}{"
#     + data_name
#     + "} \\\ \n\\cline{2-3}\n& Trans. & Semi-ind.  \\\ \n\\hline\n"
# )

# # table_data_1 = "ISE & $0.505 \\pm 0.000$ & $0.248 \\pm 0.000$ \\\\ \\hline\n"


# table_data = ""
# # Loop over each unique method + regime pair
# for (GNN_model, method) in product(GNN_models, methods):

#     max_trans_acc = df_summary[
#         (df_summary["regime"] == "Trans") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
#     ]["value"].max()
#     max_semi_ind_acc = df_summary[
#         (df_summary["regime"] == "Semi-Ind") & (df_summary["output"] == output) & (df_summary["GNN model"] == GNN_model)
#     ]["value"].max()

#     trans_acc = df_summary[
#         (df_summary["method"] == method)
#         & (df_summary["GNN model"] == GNN_model)
#         & (df_summary["regime"] == "Trans")
#         & (df_summary["output"] == output)
#         & (df_summary["statistic"] == "Mean")
#     ]["value"].values[0]
#     trans_std = df_summary[
#         (df_summary["method"] == method)
#         & (df_summary["GNN model"] == GNN_model)
#         & (df_summary["regime"] == "Trans")
#         & (df_summary["output"] == output)
#         & (df_summary["statistic"] == "St Dev")
#     ]["value"].values[0]
#     semi_ind_acc = df_summary[
#         (df_summary["method"] == method)
#         & (df_summary["GNN model"] == GNN_model)
#         & (df_summary["regime"] == "Semi-Ind")
#         & (df_summary["output"] == output)
#         & (df_summary["statistic"] == "Mean")
#     ]["value"].values[0]
#     semi_ind_std = df_summary[
#         (df_summary["method"] == method)
#         & (df_summary["GNN model"] == GNN_model)
#         & (df_summary["regime"] == "Semi-Ind")
#         & (df_summary["output"] == output)
#         & (df_summary["statistic"] == "St Dev")
#     ]["value"].values[0]

#     method_name = df_summary[
#         (df_summary["method"] == method)
#         & (df_summary["GNN model"] == GNN_model)
#     ]["name"].values[0]
    

#     if trans_acc != max_trans_acc:
#         table_data += f"{method_name} & {trans_acc:.3f} $\\pm$ {trans_std:.3f}"
#     else:
#         table_data += (
#             method_name + " & \\textbf{" + f"{trans_acc:.3f} $\\pm$ {trans_std:.3f}" + "}"
#         )

#     if semi_ind_acc != max_semi_ind_acc:
#         table_data += f" & ${semi_ind_acc:.3f} \\pm {semi_ind_std:.3f}$ \\\\ \\hline\n"
#     else:
#         table_data += (
#             " & \\textbf{"
#             + f"{semi_ind_acc:.3f} $\\pm$ {semi_ind_std:.3f}"
#             + "} \\\\ \\hline\n"
#         )

# table_end_str = (
#     "\\end{tabular}\n\\caption{}\n\\label{tab:"
#     + data_name
#     + "_"+output+"}\n\\end{table}"
# )

# full_table = table_start_str + table_data + table_end_str

# print(full_table)

\begin{table}[h]
\centering
\begin{tabular}{|l|l|l|}
\hline
Embedding & \multicolumn{2}{c|}{School} \\ 
\cline{2-3}
& Trans. & Semi-ind.  \\ 
\hline
Block GCN & \textbf{0.781 $\pm$ 0.115} & \textbf{0.865 $\pm$ 0.059} \\ \hline
UGCN & 0.712 $\pm$ 0.075 & $0.707 \pm 0.060$ \\ \hline
Block GAT & 0.802 $\pm$ 0.089 & $0.779 \pm 0.078$ \\ \hline
UGAT & \textbf{0.842 $\pm$ 0.072} & \textbf{0.861 $\pm$ 0.053} \\ \hline
\end{tabular}
\caption{}
\label{tab:School_TSC}
\end{table}
