In [None]:
from IPython.display import Markdown
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datasets_torch_geometric.dataset_factory import create_dataset
import pickle
from scipy.stats import binomtest

In [None]:
datasets_name_and_num_classes = {
    # "NCARS": {"name": "N-Cars", "num_classes": 2},
    "NASL": {"name": "N-ASL", "num_classes": 24},
    "FAN1VS3": {"name": "Fan1vs3", "num_classes": 2},
    "DVS_GESTURE_TONIC": {"name": "DVS-Gesture", "num_classes": 11},
    "NCALTECH101": {"name": "N-Caltech101", "num_classes": 101},
}
subfolder = os.path.join("images", "paper", "clustering_metric")

In [None]:
clustering_metric = {}
for dataset_name in datasets_name_and_num_classes.keys():
    with open(os.path.join(subfolder,dataset_name,"clustering_metric.pkl"), "rb") as f:
        clustering_metric[dataset_name] = pickle.load(f)
    

In [None]:
max_clustering_metric = {}
for dataset_name  in datasets_name_and_num_classes.keys():
    max_clustering_metric[dataset_name] = {}
    for key, val in clustering_metric[dataset_name].items():
        max_clustering_metric[dataset_name][key] = {}
        for k, v in val.items():
            assert isinstance(v[0], list), f"val is not a list but {type(v[0])}"
            assert len(v) > 0, f"val is empty"
            max_clustering_metric[dataset_name][key][k] = (np.max(v[0]),v[1],v[2])

In [None]:
max_clustering_metric

In [None]:
def write_p_value_md(file_path, p_values, num_events_list, datasets_name_and_num_classes):
    # Open file for writing
    with open(file_path, "w") as file:
        # Write table header
  


        file.write("| Dataset | # classes | " +
                   " | ".join([str(num_events) for num_events in num_events_list]) +
                   "\n")
        file.write("| --- "*(2+len(num_events_list))+"|\n")
        # Write table rows
        for dataset, values in p_values.items():
            row = "| " + datasets_name_and_num_classes[dataset]["name"] + " | " 
            # Number of classes
            row += str(datasets_name_and_num_classes[dataset]["num_classes"]) + " | "
            # Test accuracies
            for v in values:
                if v is not None:
                    row += "${:.2e}$".format(v) + " | "
                else:
                    row += "-- | "
            # Write the row
            file.write(row[:-2] + "|\n")


In [None]:
max_clustering_metric

In [None]:
max_clustering_metric_table = {}
for dataset_name, values in max_clustering_metric.items(): 
    num_events = list(values['test'].keys())
    assert num_events == list(values['val'].keys()), f"num_events list are different for dataset {dataset_name}"
    max_clustering_metric_table[dataset_name] = {}
    max_clustering_metric_table[dataset_name]["sparse"] = {}
    max_clustering_metric_table[dataset_name]["sparse"]["num_events"] = np.min(num_events)
    max_clustering_metric_table[dataset_name]["sparse"]["val_metric"] = max_clustering_metric[dataset_name]["val"][np.min(num_events)][0]
    max_clustering_metric_table[dataset_name]["sparse"]["test_metric"] = max_clustering_metric[dataset_name]["test"][np.min(num_events)][0]
    max_clustering_metric_table[dataset_name]["sparse"]["val_max"] = max_clustering_metric[dataset_name]["val"][np.min(num_events)][1]
    max_clustering_metric_table[dataset_name]["sparse"]["test_max"] = max_clustering_metric[dataset_name]["test"][np.min(num_events)][1]
    max_clustering_metric_table[dataset_name]["sparse"]["val_mean"] = max_clustering_metric[dataset_name]["val"][np.min(num_events)][2]
    max_clustering_metric_table[dataset_name]["sparse"]["test_mean"] = max_clustering_metric[dataset_name]["test"][np.min(num_events)][2]
    max_clustering_metric_table[dataset_name]["dense"] = {}
    max_clustering_metric_table[dataset_name]["dense"]["num_events"] = np.max(num_events)
    max_clustering_metric_table[dataset_name]["dense"]["val_metric"] = max_clustering_metric[dataset_name]["val"][np.max(num_events)][0]
    max_clustering_metric_table[dataset_name]["dense"]["test_metric"] = max_clustering_metric[dataset_name]["test"][np.max(num_events)][0]
    max_clustering_metric_table[dataset_name]["dense"]["val_max"] = max_clustering_metric[dataset_name]["val"][np.max(num_events)][1]
    max_clustering_metric_table[dataset_name]["dense"]["test_max"] = max_clustering_metric[dataset_name]["test"][np.max(num_events)][1]
    max_clustering_metric_table[dataset_name]["dense"]["val_mean"] = max_clustering_metric[dataset_name]["val"][np.max(num_events)][2]
    max_clustering_metric_table[dataset_name]["dense"]["test_mean"] = max_clustering_metric[dataset_name]["test"][np.max(num_events)][2]
    
    max_clustering_metric_table[dataset_name]["sparse"]["max_to_mean"] = max_clustering_metric_table[dataset_name]["sparse"]["test_max"]/max_clustering_metric_table[dataset_name]["sparse"]["test_mean"] - 1
    max_clustering_metric_table[dataset_name]["dense"]["max_to_mean"] = max_clustering_metric_table[dataset_name]["dense"]["test_max"]/max_clustering_metric_table[dataset_name]["dense"]["test_mean"] - 1
    


In [None]:
max_clustering_metric_table_string = {}
for dataset_name, values in max_clustering_metric_table.items(): 
    max_clustering_metric_table_string[dataset_name] = {}
    max_clustering_metric_table_string[dataset_name]["sparse"] = {}
    max_clustering_metric_table_string[dataset_name]["dense"] = {}
    max_clustering_metric_table_string[dataset_name]["sparse"]["num_events"] = str(values["sparse"]["num_events"])
    max_clustering_metric_table_string[dataset_name]["dense"]["num_events"] = str(values["dense"]["num_events"])
    if values["sparse"]["val_metric"] >= values["dense"]["val_metric"]:
        max_clustering_metric_table_string[dataset_name]["sparse"]["val_metric"] = "\\textbf{{{:.3f}}}".format(values["sparse"]["val_metric"])
        max_clustering_metric_table_string[dataset_name]["dense"]["val_metric"] = "{:.3f}".format(values["dense"]["val_metric"])
    else:
        max_clustering_metric_table_string[dataset_name]["sparse"]["val_metric"] = "{:.3f}".format(values["sparse"]["val_metric"])
        max_clustering_metric_table_string[dataset_name]["dense"]["val_metric"] = "\\textbf{{{:.3f}}}".format(values["dense"]["val_metric"])
    if values["sparse"]["test_metric"] >= values["dense"]["test_metric"]:
        max_clustering_metric_table_string[dataset_name]["sparse"]["test_metric"] = "\\textbf{{{:.3f}}}".format(values["sparse"]["test_metric"])
        max_clustering_metric_table_string[dataset_name]["dense"]["test_metric"] = "{:.3f}".format(values["dense"]["test_metric"])
    else:
        max_clustering_metric_table_string[dataset_name]["sparse"]["test_metric"] = "{:.3f}".format(values["sparse"]["test_metric"])
        max_clustering_metric_table_string[dataset_name]["dense"]["test_metric"] = "\\textbf{{{:.3f}}}".format(values["dense"]["test_metric"])
    max_clustering_metric_table_string[dataset_name]["sparse"]["test_max"] = "{:.2f}".format(values["sparse"]["test_max"] * 100)
    max_clustering_metric_table_string[dataset_name]["dense"]["test_max"] = "{:.2f}".format(values["dense"]["test_max"] * 100)
    max_clustering_metric_table_string[dataset_name]["sparse"]["test_mean"] = "{:.2f}".format(values["sparse"]["test_mean"] * 100)
    max_clustering_metric_table_string[dataset_name]["dense"]["test_mean"] = "{:.2f}".format(values["dense"]["test_mean"] * 100)    
    if values["sparse"]["max_to_mean"] >= values["dense"]["max_to_mean"]:
        max_clustering_metric_table_string[dataset_name]["sparse"]["max_to_mean"] = "\\textbf{{{:.2f}}}".format(values["sparse"]["max_to_mean"] * 100)
        max_clustering_metric_table_string[dataset_name]["dense"]["max_to_mean"] = "{:.2f}".format(values["dense"]["max_to_mean"] * 100)
    else:
        max_clustering_metric_table_string[dataset_name]["sparse"]["max_to_mean"] = "{:.2f}".format(values["sparse"]["max_to_mean"] * 100)
        max_clustering_metric_table_string[dataset_name]["dense"]["max_to_mean"] = "\\textbf{{{:.2f}}}".format(values["dense"]["max_to_mean"] * 100)


In [None]:
max_clustering_metric_table_string

In [None]:
def write_metric(file_path, max_clustering_metric_table, datasets_name_and_num_classes):
    # Open file for writing
    with open(file_path, "w") as file:
        # Write table header
        file.write("\\begin{tabular}{"+"l"+("c"*(2*len(max_clustering_metric_table)))+"}\n")
        file.write("\\toprule\n")
        file.write("Dataset & " + 
                   " & ".join(["\\multicolumn{2}{c}{" + datasets_name_and_num_classes[k]["name"] + "}" for k in max_clustering_metric_table.keys()]) +
                   "\\\\\n")
        file.write(" & " + 
                   " & ".join([r"\small{dense} & \small{sparse}" for _ in max_clustering_metric_table.keys()]) +
                   "\\\\\n") 
        file.write("\\small{\\# events} & " + 
                   " & ".join([f"\\small{{{v['dense']['num_events']}}} & \\small{{{v['sparse']['num_events']}}}" for v in max_clustering_metric_table.values()]) +
                   "\\\\\n") 
        file.write("\\midrule\n")
        file.write("val. metric & " + 
                   " & ".join(["{:.3f} & {:.3f}".format(v['dense']['val_metric'],v['sparse']['val_metric']) for v in max_clustering_metric_table.values()]) +
                   "\\\\\n")     
        file.write("test metric & " + 
                   " & ".join(["{:.3f} & {:.3f}".format(v['dense']['test_metric'],v['sparse']['test_metric']) for v in max_clustering_metric_table.values()]) +
                   "\\\\\n")                    
        file.write("\\bottomrule\n")
        file.write("\\end{tabular}\n")

In [None]:
def write_metric_and_max_mean(file_path, max_clustering_metric_table_string, datasets_name_and_num_classes):
    # Open file for writing
    with open(file_path, "w") as file:
        # Write table header
        file.write("\\begin{tabular}{"+"l"+r"c@{\hskip 0.75in}".join(["cc" for _ in range(len(max_clustering_metric_table_string))])+"}\n")
        file.write("\\toprule\n")
        file.write("Dataset & " + 
                   " & & ".join(["\\multicolumn{2}{c}{" + datasets_name_and_num_classes[k]["name"] + "}" for k in max_clustering_metric_table_string.keys()]) +
                   "\\\\")
        file.write(" ".join([f"\\cmidrule{{{3*i+2}-{3*i+3}}}" for i in range(len(max_clustering_metric_table_string))]) + "\n")
        file.write(" & " + 
                   " & & ".join([r"\small{dense} & \small{sparse}" for _ in max_clustering_metric_table_string.keys()]) +
                   "\\\\\n") 
        file.write("\\small{\\# events} & " + 
                   " & & ".join([r"\small{" + v['dense']['num_events'] + "} & " + r"\small{" + v['sparse']['num_events'] + "}" for v in max_clustering_metric_table_string.values()]) +
                   "\\\\\n") 
        file.write("\\midrule\n")
        file.write(f"\\multicolumn{{{3*len(max_clustering_metric_table)}}}{{c}}{{\\textbf{{Hyperparameter sensitivity metric}}}}\\\\\n")
        file.write("\\midrule\n")
        file.write("val. metric & " + 
                   " & & ".join([v['dense']['val_metric'] + " & " + v['sparse']['val_metric'] for v in max_clustering_metric_table_string.values()]) +
                   "\\\\\n")     
        file.write("test metric & " + 
                   " & & ".join([v['dense']['test_metric'] + " & " + v['sparse']['test_metric'] for v in max_clustering_metric_table_string.values()]) +
                   "\\\\\n")                    
        file.write("\\midrule\n")
        file.write(f"\\multicolumn{{{3*len(max_clustering_metric_table)}}}{{c}}{{\\textbf{{Mean and maximum test acc.}}}}\\\\\n")
        file.write("\\midrule\n")
        file.write("mean test acc. (\\%) & " + 
                   " & & ".join([v['dense']['test_mean'] + " & " + v['sparse']['test_mean'] for v in max_clustering_metric_table_string.values()]) +
                  "\\\\\n")  
        file.write("max. test acc. (\\%) & " + 
                   " & & ".join([v['dense']['test_max'] + " & " + v['sparse']['test_max'] for v in max_clustering_metric_table_string.values()]) +                   
                   "\\\\\n")
        file.write("max. to mean improvement (\\%) & " + 
                   " & & ".join([v['dense']['max_to_mean'] + " & " + v['sparse']['max_to_mean'] for v in max_clustering_metric_table_string.values()]) +                   
                  "\\\\\n")               
        file.write("\\bottomrule\n")
        file.write("\\end{tabular}\n")

In [None]:
file_path = os.path.join(subfolder, "max_clustering_metric.tex")
# write_metric(file_path, max_clustering_metric_table, datasets_name_and_num_classes)
write_metric_and_max_mean(file_path, max_clustering_metric_table_string, datasets_name_and_num_classes)

In [None]:
# write_p_value_md(file_path_md, p_values, num_events_list, datasets_name_and_num_classes)
# write_p_value(file_path, p_values, num_events_list, datasets_name_and_num_classes)

# # Display the content of the Markdown file as a Markdown cell
# with open(file_path_md, "r") as file:
#     markdown_content = file.read()

# Markdown(markdown_content)