In [None]:
from IPython.display import Markdown
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datasets_torch_geometric.dataset_factory import create_dataset
import pickle
from scipy.stats import binomtest

In [None]:
dataset_names = ["DVSGESTURE_TONIC",
                 "NASL",
                 "NCALTECH101",
                 "NCARS",
                 "FAN1VS3"]
datasets_name_and_num_classes = {
    "NCARS": {"name": "NCars", "num_classes": 2},
    "NASL": {"name": "NASL", "num_classes": 24},
    "NCALTECH101": {"name": "NCaltech101", "num_classes": 101},
    "DVSGESTURE_TONIC": {"name": "DVS-Gesture", "num_classes": 11},
    "FAN1VS3": {"name": "Fan1vs3", "num_classes": 2}
}

In [None]:
subfolder = os.path.join("images", "paper","sparsity_vs_acc")
if not os.path.exists(subfolder):
    os.makedirs(subfolder)
file_path = os.path.join(subfolder,"p_values.tex")
file_path_md = os.path.join(subfolder,"p_values.md")

In [None]:
try:
    with open(os.path.join(subfolder,"class_labels.pkl"), "rb") as f:
        class_labels = pickle.load(f)
except:  
    class_labels = {}
    for dataset_name in dataset_names:
        dataset = create_dataset(
                        dataset_path = os.path.join("datasets_torch_geometric", dataset_name, "data"),
                        dataset_name  = dataset_name, 
                        dataset_type = 'test'
                    )
        class_labels[dataset_name] = [d.y[0].item() for d in dataset]
        assert datasets_name_and_num_classes[dataset_name]["num_classes"] == dataset.num_classes, f"Number of classes in dataset {dataset_name} is not correct"
        print(f"{dataset_name}: Number of samples in test set: {len(dataset)}. Number of classes: {dataset.num_classes}.")
    with open(os.path.join(subfolder,"class_labels.pkl"), "wb") as f:
        pickle.dump(class_labels, f)

In [None]:
class_counts_dict = {}
for dataset_name, labels in class_labels.items():
    class_uniques, class_counts = np.unique(labels, return_counts=True)
    class_counts_dict[dataset_name] =  class_counts


In [None]:
subfolder = os.path.join("images", "paper","sparsity_vs_acc")
with open(os.path.join(subfolder, "full_test_mean_std.pickle"), "rb") as f:
    test_acc_results, num_events_list = pickle.load(f)


In [None]:
p_values = {}
for dataset_name in dataset_names:
    p_values[dataset_name] = []
    for i, num_events in enumerate(num_events_list):
        p_values[dataset_name].append(binomtest(k=int(test_acc_results[dataset_name][i][0] * class_counts_dict[dataset_name].sum()), 
                                                 n=class_counts_dict[dataset_name].sum(), 
                                                #  p=class_counts_dict[dataset_name].max()/class_counts_dict[dataset_name].sum(),
                                                p=1/len(class_counts_dict[dataset_name]),
                                                 alternative='greater').pvalue)

    

In [None]:
p_values

In [None]:
def write_p_value_md(file_path, p_values, num_events_list, datasets_name_and_num_classes):
    # Open file for writing
    with open(file_path, "w") as file:
        # Write table header
  


        file.write("| Dataset | # classes | " +
                   " | ".join([str(num_events) for num_events in num_events_list]) +
                   "\n")
        file.write("| --- "*(2+len(num_events_list))+"|\n")
        # Write table rows
        for dataset, values in p_values.items():
            row = "| " + datasets_name_and_num_classes[dataset]["name"] + " | " 
            # Number of classes
            row += str(datasets_name_and_num_classes[dataset]["num_classes"]) + " | "
            # Test accuracies
            for v in values:
                if v is not None:
                    row += "${:.2e}$".format(v) + " | "
                else:
                    row += "-- | "
            # Write the row
            file.write(row[:-2] + "|\n")


In [None]:
p_values_text = {}
for dataset, values in p_values.items():
    p_values_text[dataset] = []
    for v in values:
        if v is not None:
            p_values_text[dataset].append("{:.2e}".format(v).replace("0.00e+00","0"))
        else:
            p_values_text[dataset].append("---")
print(p_values_text)
with open(os.path.join(subfolder,"p_values_text.pkl"), "wb") as f:
    pickle.dump(p_values_text, f)

In [None]:
def write_p_value(file_path, p_values, num_events_list, datasets_name_and_num_classes):
    # Open file for writing
    with open(file_path, "w") as file:
        # Write table header
        file.write("\\begin{tabular}{"+("c"*(2+len(num_events_list)))+"}\n")
        file.write("\\toprule\n")
        file.write(" & & \\multicolumn{"+str(len(num_events_list))+"}{c}{\\# events subsampling}\\\\\n")
        file.write("Dataset & \\# classes & " +
                   " & ".join([str(num_events) for num_events in num_events_list]) +
                   "\\\\\n")
        file.write("\\midrule\n")

        # Write table rows
        for dataset, values in p_values.items():
            row = datasets_name_and_num_classes[dataset]["name"] + " & " 
            # Number of classes
            row += str(datasets_name_and_num_classes[dataset]["num_classes"]) + " & "
            # Test accuracies
            for v in values:
                if v is not None:
                    row += "${:.3f}$".format(v) + " & "
                else:
                    row += "-- & "
            # Write the row
            file.write(row[:-2] + "\\\\\n")

        # Write table footer
        file.write("\\bottomrule\n")
        file.write("\\end{tabular}\n")

In [None]:
write_p_value_md(file_path_md, p_values, num_events_list, datasets_name_and_num_classes)
write_p_value(file_path, p_values, num_events_list, datasets_name_and_num_classes)

# Display the content of the Markdown file as a Markdown cell
with open(file_path_md, "r") as file:
    markdown_content = file.read()

Markdown(markdown_content)