In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [3]:
from utils.analyzation_tools import read_event_for_different_seeds, box_plot_experiments, event_to_df
from utils.analyzation_tools import corrected_repeated_kFold_cv_test as cv_test

2024-05-08 11:23:13.278707: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:

# directory of all experiments
experimient_dir= '/media/leon/Samsung_T5/Uni/good_practices_ml/runs/experiment1'
# create lists that contain the dataframes of the different experiments
# First axis contains the different dataset configurations
# Second axis contains the different Loss configurations
# Third axis contains the DataFrame of the different seeds
region_val_datasets = []
country_val_datasets = []
region_test_datasets = []
country_test_datasets = []
other_coloumns_list = []

for folder in sorted(os.listdir(experimient_dir)):
    log_dir = os.path.join(experimient_dir, folder)
    if os.path.isdir(log_dir):
        save_path = log_dir + '/results/'
        # Call the event_to_df function with the log directory 
        rv, cv, rt, ct, o = event_to_df(log_dir)
        region_val_datasets.append(rv)
        country_val_datasets.append(cv)
        region_test_datasets.append(rt)
        country_test_datasets.append(ct)
        other_coloumns_list.append(o)


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


In [5]:
len(region_val_datasets)
sorted(os.listdir(experimient_dir))


['geo_strongly_balanced',
 'geo_strongly_balancedTest Set - Country Metrics-boxplot.png',
 'geo_strongly_balancedTest Set - Regional Metrics-boxplot.png',
 'geo_strongly_balancedValidation Set - Country Metrics-boxplot.png',
 'geo_strongly_balancedValidation Set - Regional Metrics-boxplot.png',
 'geo_unbalanced',
 'geo_weakly_balanced',
 'geo_weakly_balancedTest Set - Country Metrics-boxplot.png',
 'geo_weakly_balancedTest Set - Regional Metrics-boxplot.png',
 'geo_weakly_balancedValidation Set - Country Metrics-boxplot.png',
 'geo_weakly_balancedValidation Set - Regional Metrics-boxplot.png',
 'mixed_strongly_balanced',
 'mixed_weakly_balanced',
 'weakly-balanced-loss.png']

The result above shows that for L2 and L3 in some seeds the region precision/recall/f1 are missing

In [6]:
# Call the box_plot_experiments function with the lists of dataframes
#val_region_metrics = box_plot_experiments(region_columns_val_list, 'validation-region', save_path)
#val_country_metrics = box_plot_experiments(coutnry_columns_val_list, 'validation-country', save_path)
#test_region_metric = box_plot_experiments(region_columns_test_list, 'test-region', save_path)
#test_coutnry_metric = box_plot_experiments(country_columns_test_list, 'test-country', save_path)


In [13]:
def compare_loss(list_of_df, name, save_path):
    """
    Function to compare metrics of different experiments using t-tests.
    :param list_of_df: List of dataframes
    :param name: Name of the experiment
    :param save_path: Path to save the plot
    :return: None
    """
    list_of_df = [df.copy() for df in list_of_df]
    for i in range(len(list_of_df)):
        list_of_df[i] = list_of_df[i].assign(Experiment=f"L{i+1}")
        cols_to_drop= list_of_df[i].filter(like='text', axis=1).columns
        list_of_df[i] = list_of_df[i].drop(columns=cols_to_drop)
        
        list_of_df[i].columns = list_of_df[i].columns.str.split().str[-2:].str.join(" ")

    condf = pd.concat(list_of_df)
    metrics = condf.columns[:-1]
    meltdf = condf.melt(id_vars=["Experiment"], var_name="Metric", value_name="Value")
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x[0]) if type(x) == list else x)
    meltdf["Value"] = meltdf["Value"].astype(float)
    loss_config = ['L1', 'L2', 'L3', 'L4']
    
    for metric in metrics:
        matrix = []
        print(f"Metric: {metric}")
        for i in range(len(loss_config)):
            buffer = []
            for j in range(len(loss_config)):
                exp1 = loss_config[i]
                exp2 = loss_config[j]
                
                values1 = meltdf[meltdf['Experiment'] == exp1]
                values1 = values1[values1['Metric'] == metric]['Value']
                values2 = meltdf[meltdf['Experiment'] == exp2]
                values2 = values2[values2['Metric'] == metric]['Value']
                # We assume significnce level of 0.05; due to 10 flod validation we have 9:1 ration of samples
                _,_,_, p_value = cv_test(values1.to_list(), values2.to_list(), 9, 1, 0.05)
                if p_value < 0.05:
                    if values1.mean() > values2.mean():
                        print(f"{exp1} is significantly better than {exp2}")
                        buffer.append(1)
                    else:
                        print(f"{exp2} is significantly better than {exp1}")
                        buffer.append(-1)
                else:
                    print(f"No significant difference between {exp1} and {exp2}")
                    buffer.append(0)
            matrix.append(buffer)
        matrix = pd.DataFrame(matrix, index=loss_config, columns=loss_config)
        plt.figure(figsize=(10, 7))
        sns.heatmap(matrix, annot=True, cmap='coolwarm', cbar=False)
        plt.title(f"{metric} comparison")
        plt.savefig(save_path + f"{name}_{metric}_comparison.png")
        plt.close()
        

In [24]:
def compare_datasets_for_l1(list_of_df, name, save_path):
    list_of_df = [df[3].copy() for df in list_of_df]
    dataset_config = ['Strongly Balanced', 'Unbalanced', 'Weakly Balanced', 'Mixed Strongly Balanced', 'Mixed Weakly Balanced']
    for i in range(len(list_of_df)):
        list_of_df[i] = list_of_df[i].assign(Experiment=dataset_config[i])
        cols_to_drop= list_of_df[i].filter(like='text', axis=1).columns
        list_of_df[i] = list_of_df[i].drop(columns=cols_to_drop)
        
        list_of_df[i].columns = list_of_df[i].columns.str.split().str[-2:].str.join(" ")

    condf = pd.concat(list_of_df)
    metrics = condf.columns[:-1]
    meltdf = condf.melt(id_vars=["Experiment"], var_name="Metric", value_name="Value")
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x[0]) if type(x) == list else x)
    meltdf["Value"] = meltdf["Value"].astype(float)
    
    
    for metric in metrics:
        matrix = []
        print(f"Metric: {metric}")
        for i in range(len(dataset_config)):
            buffer = []
            for j in range(len(dataset_config)):
                exp1 = dataset_config[i]
                exp2 = dataset_config[j]
                
                values1 = meltdf[meltdf['Experiment'] == exp1]
                values1 = values1[values1['Metric'] == metric]['Value']
                values2 = meltdf[meltdf['Experiment'] == exp2]
                values2 = values2[values2['Metric'] == metric]['Value']
                # We assume significnce level of 0.05; due to 10 flod validation we have 9:1 ration of samples
                _,_,_, p_value = cv_test(values1.to_list(), values2.to_list(), 9, 1, 0.05)
                if p_value < 0.05:
                    if values1.mean() > values2.mean():
                        print(f"{exp1} is significantly better than {exp2}")
                        buffer.append(1)
                    else:
                        print(f"{exp2} is significantly better than {exp1}")
                        buffer.append(-1)
                else:
                    print(f"No significant difference between {exp1} and {exp2}")
                    buffer.append(0)
            matrix.append(buffer)
        matrix = pd.DataFrame(matrix, index=dataset_config, columns=dataset_config)
        plt.figure(figsize=(10, 7))
        sns.heatmap(matrix, annot=True, cmap='coolwarm', cbar=False)
        plt.title(f"{metric} comparison")
        plt.savefig(save_path + f"{name}_{metric}_comparison.png")
        plt.close()
    

In [25]:
import os
save_path = '/media/leon/Samsung_T5/Uni/good_practices_ml/results'
# Define the directory path
directory = '/media/leon/Samsung_T5/Uni/good_practices_ml/results'
dataset_config = ['Strongly Balanced', 'Unbalanced', 'Weakly Balanced', 'Mixed Strongly Balanced', 'Mixed Unbalanced', 'Mixed Weakly Balanced']

# Create the directory if it does not exist
if not os.path.exists(directory):
    os.makedirs(directory)

path = save_path + f"/region_val/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(region_val_datasets)):
    compare_loss(region_val_datasets[i], f'region_val_{dataset_config[i]}', path)

path = save_path + f"/region_val/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(region_val_datasets, 'region_val', path)

path = save_path + f"/region_test/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(region_test_datasets)):
    compare_loss(region_test_datasets[i], f'region_test_{dataset_config[i]}', path)

path = save_path + f"/region_test/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(region_test_datasets, 'region_test', path)

path = save_path + f"/country_val/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(country_val_datasets)):
    compare_loss(country_val_datasets[i], f'country_val_{dataset_config[i]}', path)

path = save_path + f"/country_val/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(country_val_datasets, 'country_val', path)

path = save_path + f"/country_test/loss_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
for i in range(len(country_test_datasets)):
    compare_loss(country_test_datasets[i], f'country_test_{dataset_config[i]}', path)

path = save_path + f"/country_test/dataset_comparison/"
if not os.path.exists(path):
    os.makedirs(path)
compare_datasets_for_l1(country_test_datasets, 'country_test', path)


Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
L2 is significantly better than L3
No significant difference between L2 and L4
No significant difference between L3 and L1
L2 is significantly better than L3
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference bet

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
L3 is significantly better than L1
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
L3 is significantly better than L2
No significant difference between L2 and L4
L3 is significantly better than L1
L3 is significantly better than L2
No significant difference between L3 and L3
L3 is significantly better than L4
No significant difference between L4 and L1
No significant difference between L4 and L2
L3 is significantly better than L4
No significant difference between L4 and L4
Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
L3 is significantly better than L1
L4 is significantly better than L1
No significant difference between L2 and L1
No significant difference between L2 and L2
L3 is significantly better than L2
L4 is significantly better than L2
L3 is significantly better than L1
L3 is significantly better than L2
No significant difference between L3 and L3
No significant difference between L3 and L4
L4 is significantly better than L1
L4 is significantly better than L2
No significant difference between L4 and L3
No significant difference between L4 and L4


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L4 and L4
Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No signi

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between Weakly Balanced and Strongly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
No significant difference between Weakly Balanced and Mixed Strongly Balanced
No significant difference between Weakly Balanced and Mixed Weakly Balanced
No significant difference between Mixed Strongly Balanced and Strongly Balanced
Mixed Strongly Balanced is significantly better than Unbalanced
No significant difference between Mixed Strongly Balanced and Weakly Balanced
No significant difference between Mixed Strongly Balanced and Mixed Strongly Balanced
No significant difference between Mixed Strongly Balanced and Mixed Weakly Balanced
No significant difference between Mixed Weakly Balanced and Strongly Balanced
Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Mixed Weakly Balanced and Weakly Balanced
No significant difference between Mixed 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between Weakly Balanced and Strongly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
No significant difference between Weakly Balanced and Mixed Strongly Balanced
No significant difference between Weakly Balanced and Mixed Weakly Balanced
No significant difference between Mixed Strongly Balanced and Strongly Balanced
Mixed Strongly Balanced is significantly better than Unbalanced
No significant difference between Mixed Strongly Balanced and Weakly Balanced
No significant difference between Mixed Strongly Balanced and Mixed Strongly Balanced
No significant difference between Mixed Strongly Balanced and Mixed Weakly Balanced
No significant difference between Mixed Weakly Balanced and Strongly Balanced
Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Mixed Weakly Balanced and Weakly Balanced
No significant difference between Mixed 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L4 and L4
Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference bet

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Regional Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Region Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference bet

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Regions
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Regional Accuracy
No significant difference between Strongly Balanced and Strongly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Strongly Balanced and Weakly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Unbalanced and Unbalanced
Weakly Balanced is significantly better than Unbalanced
Mixed Strongly Balanced is significantly better than Unbalanced
Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Strongly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
No significant difference between Weakly Balanced and Mixed Strongly Balanced
No significant difference between We

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Region Recall
No significant difference between Strongly Balanced and Strongly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Strongly Balanced and Weakly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Unbalanced and Unbalanced
Weakly Balanced is significantly better than Unbalanced
Mixed Strongly Balanced is significantly better than Unbalanced
Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Strongly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
No significant difference between Weakly Balanced and Mixed Strongly Balanced
No significant difference between Weakly

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Regions
No significant difference between Strongly Balanced and Strongly Balanced
Unbalanced is significantly better than Strongly Balanced
Weakly Balanced is significantly better than Strongly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balanced
Unbalanced is significantly better than Strongly Balanced
No significant difference between Unbalanced and Unbalanced
Unbalanced is significantly better than Weakly Balanced
Unbalanced is significantly better than Mixed Strongly Balanced
Unbalanced is significantly better than Mixed Weakly Balanced
Weakly Balanced is significantly better than Strongly Balanced
Unbalanced is significantly better than Weakly Balanced
No significant difference between Weakly Balanced and Weakly Balanced
Weakly Balanced is significantly better than Mixed Strongly Balanced
No significant difference between Weakly Balanced and Mixed Weakl

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Validation Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant differenc

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 a

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Classes
No significant difference between L1 and L1
L2 is significantly better than L1
L3 is significantly better than L1
No significant difference between L1 and L4
L2 is significantly better than L1
No significant difference between L2 and L2
No significant difference between L2 and L3
L2 is significantly better than L4
L3 is significantly better than L1
No significant difference between L3 and L2
No significant difference between L3 and L3
L3 is significantly better than L4
No significant difference between L4 and L1
L2 is significantly better than L4
L3 is significantly better than L4
No significant difference between L4 and L4
Metric: Validation Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No signi

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betwe

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Classes
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Validation Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant differenc

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betwe

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Classes
No significant difference between L1 and L1
L2 is significantly better than L1
L3 is significantly better than L1
L4 is significantly better than L1
L2 is significantly better than L1
No significant difference between L2 and L2
L3 is significantly better 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Validation Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant differenc

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 a

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Classes
No significant difference between L1 and L1
L2 is significantly better than L1
L3 is significantly better than L1
L4 is significantly better than L1
L2 is significantly better than L1
No significant difference between L2 and L2
No significant difference between L2 and L3
L2 is significantly better than L4
L3 is significantly better than L1
No significant difference between L3 and L2
No significant difference between L3 and L3
L3 is significantly better than L4
L4 is significantly better than L1
L2 is significantly better than L4
L3 is significantly better than L4
No significant difference between L4 and L4
Metric: Validation Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference 

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betwe

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Classes


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L1 and L1
L2 is significantly better than L1
L3 is significantly better than L1
L4 is significantly better than L1
L2 is significantly better than L1
No significant difference between L2 and L2
No significant difference between L2 and L3
L2 is significantly better than L4
L3 is significantly better than L1
No significant difference between L3 and L2
No significant difference between L3 and L3
L3 is significantly better than L4
L4 is significantly better than L1
L2 is significantly better than L4
L3 is significantly better than L4
No significant difference between L4 and L4
Metric: Validation Accuracy
No significant difference between Strongly Balanced and Strongly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Strongly Balanced and Weakly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balan

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Strongly Balanced is significantly better than Weakly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
Mixed Strongly Balanced is significantly better than Weakly Balanced
No significant difference between Weakly Balanced and Mixed Weakly Balanced
No significant difference between Mixed Strongly Balanced and Strongly Balanced
Mixed Strongly Balanced is significantly better than Unbalanced
Mixed Strongly Balanced is significantly better than Weakly Balanced
No significant difference between Mixed Strongly Balanced and Mixed Strongly Balanced
Mixed Strongly Balanced is significantly better than Mixed Weakly Balanced
No significant difference between Mixed Weakly Balanced and Strongly Balanced
Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Mixed Weakly Balanced and Weakly Balanced
Mixed Strongly Balanced is significantly better than Mixed Weakly Balanced
N

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Mixed Weakly Balanced and Weakly Balanced
No significant difference between Mixed Weakly Balanced and Mixed Strongly Balanced
No significant difference between Mixed Weakly Balanced and Mixed Weakly Balanced
Metric: Ignored Classes
No significant difference between Strongly Balanced and Strongly Balanced
Unbalanced is significantly better than Strongly Balanced
Weakly Balanced is significantly better than Strongly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
Mixed Weakly Balanced is significantly better than Strongly Balanced
Unbalanced is significantly better than Strongly Balanced
No significant difference between Unbalanced and Unbalanced
Unbalanced is significantly better than Weakly Balanced
Unbalanced is significantly better than Mixed Strongly Balanced
Unbalanced is significantly better than Mixed Weakly Balanced
Weakly Balanced is significa

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 a

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Classes
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Test Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betw

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betwe

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Classes
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Test Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betw

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 a

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Ignored Classes
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
L4 is significantly better than L1
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
L4 is significantly better than L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Test Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No s

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betwe

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Classes
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
L4 is significantly better than L1
No significant difference between L2 and L1
No significant difference between L2 and L2


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Test Accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class Precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference betw

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class Recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Class F1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: Ignored Classes
No significant difference between L1 and L1
No significant difference between L1 and L2
L3 is significantly better than L1
L4 is significantly better than L1
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
L3 is significantly better than L1
No significant difference between L3 and L2
No significant difference between L

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Test Accuracy
No significant difference between Strongly Balanced and Strongly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Strongly Balanced and Weakly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Unbalanced and Unbalanced
Weakly Balanced is significantly better than Unbalanced
Mixed Strongly Balanced is significantly better than Unbalanced
Mixed Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Strongly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
No significant difference between Weakly Balanced and Mixed Strongly Balanced
No significant difference between Weakly

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


No significant difference between Mixed Weakly Balanced and Mixed Strongly Balanced
No significant difference between Mixed Weakly Balanced and Mixed Weakly Balanced
Metric: Class Recall
No significant difference between Strongly Balanced and Strongly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Strongly Balanced and Weakly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balanced
Strongly Balanced is significantly better than Unbalanced
No significant difference between Unbalanced and Unbalanced
Weakly Balanced is significantly better than Unbalanced
Mixed Strongly Balanced is significantly better than Unbalanced
No significant difference between Unbalanced and Mixed Weakly Balanced
No significant difference between Weakly Balanced and Strongly Balanced
Weakly Balanced is significantly better than Unbalanced
No significant

  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)
  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


Metric: Class F1
No significant difference between Strongly Balanced and Strongly Balanced
No significant difference between Strongly Balanced and Unbalanced
No significant difference between Strongly Balanced and Weakly Balanced
No significant difference between Strongly Balanced and Mixed Strongly Balanced
No significant difference between Strongly Balanced and Mixed Weakly Balanced
No significant difference between Unbalanced and Strongly Balanced
No significant difference between Unbalanced and Unbalanced
No significant difference between Unbalanced and Weakly Balanced
Mixed Strongly Balanced is significantly better than Unbalanced
No significant difference between Unbalanced and Mixed Weakly Balanced
No significant difference between Weakly Balanced and Strongly Balanced
No significant difference between Weakly Balanced and Unbalanced
No significant difference between Weakly Balanced and Weakly Balanced
No significant difference between Weakly Balanced and Mixed Strongly Balanced


  t = m / np.sqrt((1 / n + n2 / n1) * stdv_sq)


In [23]:

log_dir = '/media/leon/Samsung_T5/Uni/good_practices_ml/runs/experiment1/geo_weakly_balanced'
region_columns_val_list, coutnry_columns_val_list, region_columns_test_list, country_columns_test_list = event_to_df(log_dir)
val_region_metrics = box_plot_experiments(region_columns_val_list, 'Validation Set - Regional Metrics', log_dir)
val_country_metrics = box_plot_experiments(coutnry_columns_val_list, 'Validation Set - Country Metrics', log_dir)
test_region_metric = box_plot_experiments(region_columns_test_list, 'Test Set - Regional Metrics', log_dir)
test_coutnry_metric = box_plot_experiments(country_columns_test_list, 'Test Set - Country Metrics', log_dir)
result_df = pd.concat([val_region_metrics.drop(columns='Experiment').add_prefix('_val'), val_country_metrics.drop(columns='Experiment').add_prefix('_val'), test_region_metric.drop(columns='Experiment'), test_coutnry_metric], axis=1)
#result_df.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1', 'Experiment']

validation = result_df.filter(like='val')
validation['Experiment'] = result_df['Experiment']

test = result_df.filter(like='Test')
test['Experiment'] = result_df['Experiment']

#print(validation.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
#print(test.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())


KeyboardInterrupt: 

In [None]:
print(result_df.groupby('Experiment').mean().round(decimals=3))


            _valRegional Accuracy  _valRegion Precision  _valRegion Recall  \
Experiment                                                                   
L1                          0.740                 0.468              0.479   
L2                          0.830                 0.537              0.527   
L3                          0.744                 0.448              0.462   
L4                          0.779                 0.494              0.491   

            _valRegion F1  _valIgnored Regions  _valValidation Accuracy  \
Experiment                                                                
L1                  0.465                  6.4                    0.614   
L2                  0.514                  6.2                    0.554   
L3                  0.445                  7.9                    0.487   
L4                  0.473                  6.5                    0.590   

            _valClass Precision  _valClass Recall  _valClass F1  \
Experiment   

In [None]:
strongly_l1 = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/geo_strongly_balanced/starting_regional_loss_portion-0.0'
strongly_l3 = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/geo_strongly_balanced/starting_regional_loss_portion-0.5'

weakly_l1 = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/geo_weakly_balanced/starting_regional_loss_portion-0.0'
weakly_l2 = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/geo_weakly_balanced/starting_regional_loss_portion-0.25'

# Call the read_event_for_different_seeds function for each folder
rval, cval, rtest, ctest = read_event_for_different_seeds(strongly_l1)
strongly_l1_df = pd.concat([rval, cval, rtest, ctest], axis=1)
x = strongly_l1_df["Epoch Validation Number of Ignored Classes"].mean()
y = strongly_l1_df["Test Number of Ignored Classes"].mean()
print(f"Strongly l1 ignotrf conutries validation: {x}")
print(f"Strongly l1 ignotrf conutries test: {y}")
strongly_l1_df = strongly_l1_df.drop(strongly_l1_df.filter(like='Ignored').columns, axis=1)
strongly_l1_df.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_strongly_l1 = strongly_l1_df.filter(like='val').assign(Experiment=f'Strongly Balanced L1')
test_strongly_l1 = strongly_l1_df.filter(like='Test').assign(Experiment=f'Strongly Balanced L1')

rval, cval, rtest, ctest = read_event_for_different_seeds(strongly_l3)
strongly_l3_df = pd.concat([rval, cval, rtest, ctest], axis=1)
x = strongly_l3_df["Epoch Validation Number of Ignored Classes"].mean()
y = strongly_l3_df["Test Number of Ignored Classes"].mean()
print(f"Strongly l3 ignotrf conutries validation: {x}")
print(f"Strongly l3 ignotrf conutries test: {y}")
strongly_l3_df = strongly_l3_df.drop(strongly_l3_df.filter(like='Ignored').columns, axis=1)
strongly_l3_df.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_strongly_l3 = strongly_l3_df.filter(like='val').assign(Experiment=f'Strongly Balanced L3')
test_strongly_l3 = strongly_l3_df.filter(like='Test').assign(Experiment=f'Strongly Balanced L3')

rval, cval, rtest, ctest = read_event_for_different_seeds(weakly_l1)
weakly_l1_df = pd.concat([rval, cval, rtest, ctest], axis=1)
x = weakly_l1_df["Epoch Validation Number of Ignored Classes"].mean()
y = weakly_l1_df["Test Number of Ignored Classes"].mean()
print(f"Weakly l1 ignotrf conutries validation: {x}")
print(f"Weakly l1 ignotrf conutries test: {y}")
weakly_l1_df = weakly_l1_df.drop(weakly_l1_df.filter(like='Ignored').columns, axis=1)
weakly_l1_df.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_weakly_l1 = weakly_l1_df.filter(like='val').assign(Experiment=f'Weakly Balanced L1')
test_weakly_l1 = weakly_l1_df.filter(like='Test').assign(Experiment=f'Weakly Balanced L1')

rval, cval, rtest, ctest = read_event_for_different_seeds(weakly_l2)
weakly_l2_df = pd.concat([rval, cval, rtest, ctest], axis=1)
x = weakly_l2_df["Epoch Validation Number of Ignored Classes"].mean()
y = weakly_l2_df["Test Number of Ignored Regions"].mean()
print(f"Weakly l2 ignotrf conutries validation: {x}")
print(f"Weakly l2 ignotrf conutries test: {y}")
weakly_l2_df = weakly_l2_df.drop(weakly_l2_df.filter(like='Ignored').columns, axis=1)
weakly_l2_df.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_weakly_l2 = weakly_l2_df.filter(like='val').assign(Experiment=f'Weakly Balanced L2')
test_weakly_l2 = weakly_l2_df.filter(like='Test').assign(Experiment=f'Weakly Balanced L2')

dataset_config=[strongly_l1_df, strongly_l3_df, weakly_l1_df, weakly_l2_df]
exp_names = ['Strongly L1', 'Strongly L3', 'Weakly L1', 'Weakly L2']


for i in range(0,2):
    for metrics in strongly_l1_df.columns:
        values1 = dataset_config[i][metrics]
        values2 = dataset_config[i+2][metrics]
        t_statistic, p_value = ttest_rel(values1, values2)
        print(f"Metric: {metrics}")
        if p_value < 0.05:
            if values1.mean() > values2.mean():
                print(f"{exp_names[i]} is significantly better than {exp_names[i+2]}\n")
            else:
                print(f"{exp_names[i+2]} is significantly better than {exp_names[i]}\n")
        else:
            print(f"No significant difference between {exp_names[i]} and {exp_names[i+2]}\n")
    print('------------------------------------\n\n')

country_compare_val = pd.concat([val_strongly_l1, val_weakly_l1])
country_compare_val = country_compare_val.loc[:, ~country_compare_val.columns.str.contains('Test')]
region_compare_val = pd.concat([val_strongly_l3, val_weakly_l2])
region_compare_val = region_compare_val.loc[:, ~region_compare_val.columns.str.contains('Test')]
country_compare_test = pd.concat([test_strongly_l1, test_weakly_l1])
country_compare_test = country_compare_test.loc[:, ~country_compare_test.columns.str.contains('val')]
region_compare_test = pd.concat([test_strongly_l3, test_weakly_l2])
region_compare_test = region_compare_test.loc[:, ~region_compare_test.columns.str.contains('val')]

print(country_compare_val.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
print(country_compare_test.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
print(region_compare_val.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
print(region_compare_test.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())

Strongly l1 ignotrf conutries validation: 49.7
Strongly l1 ignotrf conutries test: 60.9
Strongly l3 ignotrf conutries validation: 56.4
Strongly l3 ignotrf conutries test: 63.857142857142854
Weakly l1 ignotrf conutries validation: 67.2
Weakly l1 ignotrf conutries test: 75.2
Weakly l2 ignotrf conutries validation: 72.0
Weakly l2 ignotrf conutries test: 6.666666666666667
Metric: val rAcc 
No significant difference between Strongly L1 and Weakly L1

Metric: val rPrec
No significant difference between Strongly L1 and Weakly L1

Metric: val rRec
No significant difference between Strongly L1 and Weakly L1

Metric: val rF1
No significant difference between Strongly L1 and Weakly L1

Metric: val Acc
No significant difference between Strongly L1 and Weakly L1

Metric: val Prec
Strongly L1 is significantly better than Weakly L1

Metric: val Rec
Strongly L1 is significantly better than Weakly L1

Metric: val F1
Strongly L1 is significantly better than Weakly L1

Metric: Test rAcc
No significant di

In [None]:
geo_strong = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/geo_strongly_balanced/starting_regional_loss_portion-0.0'
mixed_geo_strong = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/mixed_strongly_balanced/starting_regional_loss_portion-0.0'

geo_weak = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/geo_weakly_balanced/starting_regional_loss_portion-0.0'
mixed_geo_weak = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/mixed_weakly_balanced/starting_regional_loss_portion-0.0'

# Call the read_event_for_different_seeds function for each folder
rval, cval, rtest, ctest = read_event_for_different_seeds(geo_strong)
geo_strong_df = pd.concat([rval, cval, rtest, ctest], axis=1)
x = geo_strong_df["Epoch Validation Number of Ignored Classes"].mean()
y = geo_strong_df["Test Number of Ignored Classes"].mean()
print(f"Strongly ignotrf conutries validation: {x}")
print(f"Strongly ignotrf conutries test: {y}")
geo_strong_df = geo_strong_df.drop(geo_strong_df.filter(like='Ignored').columns, axis=1)
geo_strong_df.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

geo_strong_val = geo_strong_df.filter(like='val').assign(Experiment=f'Strongly\n Balanced')
geo_strong_test = geo_strong_df.filter(like='Test').assign(Experiment=f'Strongly\n Balanced')

rval, cval, rtest, ctest = read_event_for_different_seeds(mixed_geo_strong)
mixed_geo_strong= pd.concat([rval, cval, rtest, ctest], axis=1)
x = mixed_geo_strong["Epoch Validation Number of Ignored Classes"].mean()
y = mixed_geo_strong["Test Number of Ignored Classes"].mean()
print(f"mixed Strongly ignotrf conutries validation: {x}")
print(f"mixed Strongly ignotrf conutries test: {y}")
mixed_geo_strong = mixed_geo_strong.drop(mixed_geo_strong.filter(like='Ignored').columns, axis=1)
mixed_geo_strong.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_mixed_geo_strong = mixed_geo_strong.filter(like='val').assign(Experiment=f'Mixed\n Strongly Balanced')
test_mixed_geo_strong = mixed_geo_strong.filter(like='Test').assign(Experiment=f'Mixed\n Strongly Balanced')

rval, cval, rtest, ctest = read_event_for_different_seeds(geo_weak)
geo_weak = pd.concat([rval, cval, rtest, ctest], axis=1)
x = geo_weak["Epoch Validation Number of Ignored Classes"].mean()
y = geo_weak["Test Number of Ignored Classes"].mean()
print(f"Weakly ignotrf conutries validation: {x}")
print(f"Weakly ignotrf conutries test: {y}")
geo_weak = geo_weak.drop(geo_weak.filter(like='Ignored').columns, axis=1)
geo_weak.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_geo_weak = geo_weak.filter(like='val').assign(Experiment=f'Weakly\n Balanced')
test_geo_weak = geo_weak.filter(like='Test').assign(Experiment=f'Weakly\n Balanced')

rval, cval, rtest, ctest = read_event_for_different_seeds(mixed_geo_weak)
mixed_geo_weak = pd.concat([rval, cval, rtest, ctest], axis=1)
x = mixed_geo_weak["Epoch Validation Number of Ignored Classes"].mean()
y = mixed_geo_weak["Test Number of Ignored Classes"].mean()
print(f"mixed Weakly ignotrf conutries validation: {x}")
print(f"mixed Weakly ignotrf conutries test: {y}")
mixed_geo_weak = mixed_geo_weak.drop(mixed_geo_weak.filter(like='Ignored').columns, axis=1)
mixed_geo_weak.columns = ['val rAcc ','val rPrec' ,'val rRec' ,'val rF1' ,'val Acc' ,'val Prec' ,'val Rec' ,'val F1' ,'Test rAcc' ,'Test rPrec' ,'Test rRec' ,'Test rF1' ,'Test Acc' ,'Test Prec' ,'Test Rec' ,'Test F1']

val_mixed_geo_weak = mixed_geo_weak.filter(like='val').assign(Experiment=f'Mixed\n Weakly Balanced')
test_mixed_geo_weak = mixed_geo_weak.filter(like='Test').assign(Experiment=f'Mixed\n Weakly Balanced')

dataset_config=[geo_strong_test, test_geo_weak, test_mixed_geo_strong, test_mixed_geo_weak]
exp_names = ['Strongly Balanced', 'Weakly Balanced', 'Mixed Strongly Balanced', 'Mixed Weakly Balanced']


for i in range(0,2):
    for metrics in test_mixed_geo_weak.columns[:-1]:
        values1 = dataset_config[i][metrics]
        values2 = dataset_config[i+2][metrics]
        t_statistic, p_value = ttest_rel(values1.to_list(), values2.to_list())
        print(f"Metric: {metrics}")
        if p_value < 0.05:
            if values1.mean() > values2.mean():
                print(f"{exp_names[i]} is significantly better than {exp_names[i+2]}\n")
            else:
                print(f"{exp_names[i+2]} is significantly better than {exp_names[i]}\n")
        else:
            print(f"No significant difference between {exp_names[i]} and {exp_names[i+2]}\n")
    print('------------------------------------\n\n')

val_strong = pd.concat([geo_strong_val, val_mixed_geo_strong])
#val_strong = val_strong.loc[:, ~val_strong.columns.str.contains('val')]
test_strong = pd.concat([geo_strong_test, test_mixed_geo_strong])
#test_strong = test_strong.loc[:, ~test_strong.columns.str.contains('Test')]
val_weak = pd.concat([val_geo_weak, val_mixed_geo_weak])
#val_weak = val_weak.loc[:, ~val_weak.columns.str.contains('val')]
test_weak = pd.concat([test_geo_weak, test_mixed_geo_weak])
#test_weak = test_weak.loc[:, ~test_weak.columns.str.contains('Test')]

print(val_strong.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
print(test_strong.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
print(val_weak.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
print(test_weak.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())

save_path = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/'
condf = pd.concat([val_strong, val_weak])
condf.columns = condf.columns.str.replace('val ', '')
meltdf = condf.melt(id_vars=['Experiment'], var_name='Metric', value_name='Value')
ax = sns.boxplot(x="Experiment", y="Value", hue="Metric", data=meltdf, showfliers=False) 
ax.set_title('Validation Set')
ax.set_xlabel('')
ax.set_ylabel('')
lgd = plt.legend(bbox_to_anchor=(0.95, 0.95), loc=2, borderaxespad=0.)
plt.savefig(save_path+f'Val-Experiment-boxplot.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.clf()
plt.close()

save_path = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/'
condf = pd.concat([test_strong, test_weak])
condf.columns = condf.columns.str.replace('Test ', '')
meltdf = condf.melt(id_vars=['Experiment'], var_name='Metric', value_name='Value')
ax = sns.boxplot(x="Experiment", y="Value", hue="Metric", data=meltdf, showfliers=False) 
ax.set_title('Test Set')
lgd = plt.legend(bbox_to_anchor=(0.95, 0.95), loc=2, borderaxespad=0.)
ax.set_xlabel('')
ax.set_ylabel('')
plt.savefig(save_path+f'Test-Experiment-boxplot.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.clf()
plt.close()

save_path = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/'
condf = pd.concat([geo_strong_val, val_geo_weak])
condf.columns = condf.columns.str.replace('val ', '')
meltdf = condf.melt(id_vars=['Experiment'], var_name='Metric', value_name='Value')
ax = sns.boxplot(x="Experiment", y="Value", hue="Metric", data=meltdf, showfliers=False) 
ax.set_title('Validation Set')
lgd = plt.legend(bbox_to_anchor=(0.95, 0.95), loc=2, borderaxespad=0.)
ax.set_xlabel('')
ax.set_ylabel('')
plt.savefig(save_path+f'Val-Balancing-boxplot.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.clf()
plt.close()

save_path = '/home/lbrenig/Documents/Uni/GPML/good_practices_ml/runs/experiment1/'
condf = pd.concat([geo_strong_test, test_geo_weak])
condf.columns = condf.columns.str.replace('Test ', '')
meltdf = condf.melt(id_vars=['Experiment'], var_name='Metric', value_name='Value')
ax = sns.boxplot(x="Experiment", y="Value", hue="Metric", data=meltdf, showfliers=False) 
ax.set_title('Test Set')
lgd = plt.legend(bbox_to_anchor=(0.95, 0.95), loc=2, borderaxespad=0.)
ax.set_xlabel('')
ax.set_ylabel('')
plt.savefig(save_path+f'Test-Balancing-boxplot.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
plt.clf()
plt.close()

Strongly ignotrf conutries validation: 49.7
Strongly ignotrf conutries test: 60.9
mixed Strongly ignotrf conutries validation: 49.9
mixed Strongly ignotrf conutries test: 56.4
Weakly ignotrf conutries validation: 67.2
Weakly ignotrf conutries test: 75.2
mixed Weakly ignotrf conutries validation: 71.3
mixed Weakly ignotrf conutries test: 74.2
Metric: Test rAcc
No significant difference between Strongly Balanced and Mixed Strongly Balanced

Metric: Test rPrec
No significant difference between Strongly Balanced and Mixed Strongly Balanced

Metric: Test rRec
No significant difference between Strongly Balanced and Mixed Strongly Balanced

Metric: Test rF1
No significant difference between Strongly Balanced and Mixed Strongly Balanced

Metric: Test Acc
No significant difference between Strongly Balanced and Mixed Strongly Balanced

Metric: Test Prec
No significant difference between Strongly Balanced and Mixed Strongly Balanced

Metric: Test Rec
No significant difference between Strongly Bal

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

l1_weakly_df = "/home/leon/Downloads/geo_weakly_balanced_starting_regional_loss_portion-0-0.csv"
l2_weakly_df = "/home/leon/Downloads/geo_weakly_balanced_starting_regional_loss_portion-0-25.csv"
l3_weakly_df = "/home/leon/Downloads/geo_weakly_balanced_starting_regional_loss_portion-0-5.csv"
l4_weakly_df = "/home/leon/Downloads/geo_weakly_balanced_starting_regional_loss_portion-0-8.csv"

# Read the CSV files into DataFrames
df1 = pd.read_csv(l1_weakly_df)
df2 = pd.read_csv(l2_weakly_df)
df3 = pd.read_csv(l3_weakly_df)
df4 = pd.read_csv(l4_weakly_df)

# Plot the DataFrames
plt.plot(df1['x'], df1['y'], label='l1_weakly_df')
plt.plot(df2['x'], df2['y'], label='l2_weakly_df')
plt.plot(df3['x'], df3['y'], label='l3_weakly_df')
plt.plot(df4['x'], df4['y'], label='l4_weakly_df')

# Add labels and legend
plt.xlabel('x')
plt.ylabel('y')
plt.legend()

# Show the plot
plt.savefig('/home/leon/Downloads/weakly_balanced.png')


OSError: [Errno 5] Input/output error

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Directory path containing the CSV files
directory = '/media/leon/Samsung_T5/Uni/good_practices_ml/accuracy_data'

# List to store the dataframes
dataframes = []

# Iterate over the files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path, index_col='Step')
        df = df.rename(columns={"Value": f"L{i+1}"})
        df = df.drop(columns=["Wall time"])
        dataframes.append(df)

# Combine all dataframes into one
combined_df = pd.concat(dataframes)

# Plot the combined dataframe
combined_df.plot()
plt.ylabel('Accuracy')
# Show the plot

plt.show()
#
plt.savefig('accuracy_plot.png')