In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
import sys
sys.path.append('../')
from utils.analyzation_tools import read_event_for_different_seeds, box_plot_experiments, event_to_df
from utils.analyzation_tools import corrected_repeated_kFold_cv_test as cv_test

In [None]:

# directory of all experiments
experimient_dir= ''
# create lists that contain the dataframes of the different experiments
# First axis contains the different dataset configurations
# Second axis contains the different Loss configurations
# Third axis contains the DataFrame of the different seeds
region_val_datasets = []
country_val_datasets = []
region_test_datasets = []
country_test_datasets = []
other_coloumns_list = []

for folder in sorted(os.listdir(experimient_dir)):
    log_dir = os.path.join(experimient_dir, folder)
    if os.path.isdir(log_dir):
        save_path = log_dir + '/results/'
        # Call the event_to_df function with the log directory 
        rv, cv, rt, ct, o = event_to_df(log_dir)
        region_val_datasets.append(rv)
        country_val_datasets.append(cv)
        region_test_datasets.append(rt)
        country_test_datasets.append(ct)
        other_coloumns_list.append(o)


In [None]:
len(region_val_datasets)
sorted(os.listdir(experimient_dir))


The result above shows that for L2 and L3 in some seeds the region precision/recall/f1 are missing

In [None]:
path = ""
read_event_for_different_seeds(path)['Epoch Validation List of Ignored Regions/text_summary']


In [None]:
def compare_loss(list_of_df, name, save_path):
    """
    Function to compare metrics of different experiments using t-tests.
    :param list_of_df: List of dataframes
    :param name: Name of the experiment
    :param save_path: Path to save the plot
    :return: None
    """
    list_of_df = [df.copy() for df in list_of_df]
    for i in range(len(list_of_df)):
        list_of_df[i] = list_of_df[i].assign(Experiment=f"L{i+1}")
        cols_to_drop= list_of_df[i].filter(like='text', axis=1).columns
        list_of_df[i] = list_of_df[i].drop(columns=cols_to_drop)
        
        list_of_df[i].columns = list_of_df[i].columns.str.split().str[-2:].str.join(" ")

    condf = pd.concat(list_of_df)
    metrics = condf.columns[:-1]
    meltdf = condf.melt(id_vars=["Experiment"], var_name="Metric", value_name="Value")
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x[0]) if type(x) == list else x)
    meltdf["Value"] = meltdf["Value"].astype(float)
    loss_config = ['L1', 'L2', 'L3', 'L4']
    
    for metric in metrics:
        matrix = []
        print(f"Metric: {metric}")
        for i in range(len(loss_config)):
            buffer = []
            for j in range(len(loss_config)):
                exp1 = loss_config[i]
                exp2 = loss_config[j]
                
                values1 = meltdf[meltdf['Experiment'] == exp1]
                values1 = values1[values1['Metric'] == metric]['Value']
                values2 = meltdf[meltdf['Experiment'] == exp2]
                values2 = values2[values2['Metric'] == metric]['Value']
                # We assume significnce level of 0.05; due to 10 flod validation we have 9:1 ration of samples
                _,_,_, p_value = cv_test(values1.to_list(), values2.to_list(), 9, 1, 0.05)
                if p_value < 0.05:
                    if values1.mean() > values2.mean():
                        print(f"{exp1} is significantly better than {exp2}")
                        buffer.append(1)
                    else:
                        print(f"{exp2} is significantly better than {exp1}")
                        buffer.append(-1)
                else:
                    print(f"No significant difference between {exp1} and {exp2}")
                    buffer.append(0)
            matrix.append(buffer)
        matrix = pd.DataFrame(matrix, index=loss_config, columns=loss_config)
        plt.figure(figsize=(10, 7))
        sns.heatmap(matrix, annot=True, cmap='coolwarm', cbar=False)
        plt.title(f"{metric} comparison")
        plt.savefig(save_path + f"{name}_{metric}_comparison.png")
        plt.close()
        

In [None]:
def compare_datasets_for_l1(list_of_df, name, save_path):
    list_of_df = [df[3].copy() for df in list_of_df]
    dataset_config = ['Strongly Balanced', 'Unbalanced', 'Weakly Balanced', 'Mixed Strongly Balanced', 'Mixed Weakly Balanced']
    for i in range(len(list_of_df)):
        list_of_df[i] = list_of_df[i].assign(Experiment=dataset_config[i])
        cols_to_drop= list_of_df[i].filter(like='text', axis=1).columns
        list_of_df[i] = list_of_df[i].drop(columns=cols_to_drop)
        
        list_of_df[i].columns = list_of_df[i].columns.str.split().str[-2:].str.join(" ")

    condf = pd.concat(list_of_df)
    metrics = condf.columns[:-1]
    meltdf = condf.melt(id_vars=["Experiment"], var_name="Metric", value_name="Value")
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x[0]) if type(x) == list else x)
    meltdf["Value"] = meltdf["Value"].astype(float)
    
    
    for metric in metrics:
        matrix = []
        print(f"Metric: {metric}")
        for i in range(len(dataset_config)):
            buffer = []
            for j in range(len(dataset_config)):
                exp1 = dataset_config[i]
                exp2 = dataset_config[j]
                
                values1 = meltdf[meltdf['Experiment'] == exp1]
                values1 = values1[values1['Metric'] == metric]['Value']
                values2 = meltdf[meltdf['Experiment'] == exp2]
                values2 = values2[values2['Metric'] == metric]['Value']
                # We assume significnce level of 0.05; due to 10 flod validation we have 9:1 ration of samples
                _,_,_, p_value = cv_test(values1.to_list(), values2.to_list(), 9, 1, 0.05)
                if p_value < 0.05:
                    if values1.mean() > values2.mean():
                        print(f"{exp1} is significantly better than {exp2}")
                        buffer.append(1)
                    else:
                        print(f"{exp2} is significantly better than {exp1}")
                        buffer.append(-1)
                else:
                    print(f"No significant difference between {exp1} and {exp2}")
                    buffer.append(0)
            matrix.append(buffer)
        matrix = pd.DataFrame(matrix, index=dataset_config, columns=dataset_config)
        plt.figure(figsize=(10, 7))
        sns.heatmap(matrix, annot=True, cmap='coolwarm', cbar=False)
        plt.title(f"{metric} comparison")
        plt.savefig(save_path + f"{name}_{metric}_comparison.png")
        plt.close()
    

In [None]:
import os
save_path = 'path/to/save/results/'
# Define the directory path
directory = 'path/to/data/'
dataset_config = ['Strongly Balanced', 'Unbalanced', 'Weakly Balanced', 'Mixed Strongly Balanced', 'Mixed Unbalanced', 'Mixed Weakly Balanced']

# Create the directory if it does not exist
if not os.path.exists(directory):
    os.makedirs(directory)

path = save_path + f"/region_val/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(region_val_datasets)):
    compare_loss(region_val_datasets[i], f'region_val_{dataset_config[i]}', path)

path = save_path + f"/region_val/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(region_val_datasets, 'region_val', path)

path = save_path + f"/region_test/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(region_test_datasets)):
    compare_loss(region_test_datasets[i], f'region_test_{dataset_config[i]}', path)

path = save_path + f"/region_test/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(region_test_datasets, 'region_test', path)

path = save_path + f"/country_val/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(country_val_datasets)):
    compare_loss(country_val_datasets[i], f'country_val_{dataset_config[i]}', path)

path = save_path + f"/country_val/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(country_val_datasets, 'country_val', path)

path = save_path + f"/country_test/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(country_test_datasets)):
    compare_loss(country_test_datasets[i], f'country_test_{dataset_config[i]}', path)

path = save_path + f"/country_test/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(country_test_datasets, 'country_test', path)
