In [44]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import ast
import torch
import numpy as np
import seaborn as sns
from scipy.stats import ttest_ind
from sklearn.metrics import confusion_matrix


In [45]:
import sys
sys.path.append('../')
from utils.analyzation_tools import corrected_repeated_kFold_cv_test as cv_test
from finetuning.model.region_loss import Regional_Loss


## Helpers


In [46]:

def compare_loss(list_of_df, name, save_path):
    """
    Function to compare metrics of different experiments using t-tests.
    :param list_of_df: List of dataframes
    :param name: Name of the experiment
    :param save_path: Path to save the plot
    :return: None
    """
    list_of_df = [df.copy() for df in list_of_df]
    for i in range(len(list_of_df)):
        list_of_df[i] = list_of_df[i].assign(Experiment=f"L{i+1}")
        cols_to_drop= list_of_df[i].filter(like='text', axis=1).columns
        list_of_df[i] = list_of_df[i].drop(columns=cols_to_drop)
        
        list_of_df[i].columns = list_of_df[i].columns.str.split().str[-2:].str.join(" ")

    condf = pd.concat(list_of_df)
    metrics = condf.columns[:-1]
    meltdf = condf.melt(id_vars=["Experiment"], var_name="Metric", value_name="Value")
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x[0]) if type(x) == list else x)
    meltdf["Value"] = meltdf["Value"].astype(float)
    loss_config = ['L1', 'L2', 'L3', 'L4']
    
    for metric in metrics:
        matrix = []
        print(f"Metric: {metric}")
        for i in range(len(loss_config)):
            buffer = []
            for j in range(len(loss_config)):
                exp1 = loss_config[i]
                exp2 = loss_config[j]
                
                values1 = meltdf[meltdf['Experiment'] == exp1]
                values1 = values1[values1['Metric'] == metric]['Value']
                values2 = meltdf[meltdf['Experiment'] == exp2]
                values2 = values2[values2['Metric'] == metric]['Value']
                # We assume significnce level of 0.05; due to 10 flod validation we have 9:1 ration of samples
                t_stat, p_value = ttest_ind(values1.to_list(), values2.to_list())
                if p_value < 0.05:
                    if values1.mean() > values2.mean():
                        print(f"{exp1} is significantly better than {exp2}")
                        buffer.append(1)
                    else:
                        print(f"{exp2} is significantly better than {exp1}")
                        buffer.append(-1)
                else:
                    print(f"No significant difference between {exp1} and {exp2}")
                    buffer.append(0)
            matrix.append(buffer)
        matrix = pd.DataFrame(matrix, index=loss_config, columns=loss_config)
        plt.figure(figsize=(10, 7))
        sns.heatmap(matrix, annot=True, cmap='coolwarm', cbar=False)
        plt.title(f"{metric} comparison")
        plt.savefig(save_path + f"{name}_{metric}_comparison.png")
        plt.close()
      

In [47]:

def calculate_metrics(df, data_type, REPO_PATH):
    """
    Calculate the metrics for region and country columns.

    Args:
        df (pd.DataFrame): The DataFrame containing the metrics.
        data_type (str): The type of data (validation or test).

    Returns:
        pd.DataFrame: The DataFrame containing the calculated metrics.
    """
    country_list = f'{REPO_PATH}/utils/country_list/country_list_region_and_continent.csv'
    country_list = pd.read_csv(country_list)
    metrics_calculator = Regional_Loss(country_list=country_list)
    # Convert the 'Output' column to a list of tensors
    df['Output'] = df['Output'].apply(lambda x: torch.tensor(x))
    if data_type == 'validation':
        dfs = np.array_split(df, 10)
        for df in dfs:
            # Stack the list of tensors into a single tensor
            outputs = torch.stack(df['Output'].tolist())
            c_ac = metrics_calculator.calculate_country_accuracy(outputs, df['Label'])
            c_prec, c_rec, c_f1,_,_ = metrics_calculator.calculate_metrics_per_class(outputs, df['Label'])
            r_ac = metrics_calculator.claculate_region_accuracy(outputs, df['Label'])
            r_prec, r_rec, r_f1,_,_ = metrics_calculator.calculate_metrics_per_region(outputs, df['Label'])
            ignored_class = len(df['Label'].unique())  - len(df['Prediction'].unique())
            ignored_region= sum(1 for x, y in zip(r_prec, r_rec) if x == 0 and y == 0)

            metrics = {
                'country_accuracy': [c_ac],
                'country_precision': [c_prec.mean()],
                'country_recall': [c_rec.mean()],
                'country_f1': [c_f1.mean()],
                'region_accuracy': [r_ac],
                'region_precision': [r_prec.mean()],
                'region_recall': [r_rec.mean()],
                'region_f1': [r_f1.mean()],
                'ignored_classes': [ignored_class],
                'ignored_regions': [ignored_region],
                'prediction': [df['Prediction']],
                'label': [df['Label']]
            }
            if 'all_metrics' in locals():
                all_metrics = pd.concat([all_metrics, pd.DataFrame(metrics)])
            else:
                all_metrics = pd.DataFrame(metrics)
    else:
        # Stack the list of tensors into a single tensor
        outputs = torch.stack(df['Output'].tolist())
        c_ac = metrics_calculator.calculate_country_accuracy(outputs, df['Label'])
        c_prec, c_rec, c_f1,_,_ = metrics_calculator.calculate_metrics_per_class(outputs, df['Label'])
        r_ac = metrics_calculator.claculate_region_accuracy(outputs, df['Label'])
        r_prec, r_rec, r_f1,_,_ = metrics_calculator.calculate_metrics_per_region(outputs, df['Label'])
        ignored_class = len(df['Label'].unique())  - len(df['Prediction'].unique())
        ignored_region= sum(1 for x, y in zip(r_prec, r_rec) if x == 0 and y == 0)

        metrics = {
            'country_accuracy': [c_ac],
            'country_precision': [c_prec.mean()],
            'country_recall': [c_rec.mean()],
            'country_f1': [c_f1.mean()],
            'region_accuracy': [r_ac],
            'region_precision': [r_prec.mean()],
            'region_recall': [r_rec.mean()],
            'region_f1': [r_f1.mean()],
            'ignored_classes': [ignored_class],
            'ignored_regions': [ignored_region],
            'prediction': [df['Prediction']],
            'label': [df['Label']]
        }
        all_metrics = pd.DataFrame(metrics)

    return all_metrics


In [48]:
def read_csv_from_dir(log_dir, REPO_PATH):
    """

    """
    # Create empty lists to store the dataframes
    validation_dfs = []
    test_dfs = []
    zero_shot_dfs = []

    # Iterate over the folders in the log directory
    for folder in sorted(os.listdir(log_dir)):
        folder_path = os.path.join(log_dir, folder)
        if os.path.isdir(folder_path):
            # calculate the metrics for all seeds in the folder
            log_files = glob.glob(folder_path + "/*")
            validation_buffer = []
            test_buffer = []
            zero_shot_buffer = []
            for file_path in log_files:
                if '.csv' not in file_path:
                    continue
                df = pd.read_csv(file_path,converters={"Output": ast.literal_eval})

                # Split the data into validation and test data
                if 'validation' in file_path:
                    df = calculate_metrics(df, 'validation', REPO_PATH=REPO_PATH)
                    validation_buffer.append(df)
                elif 'zero' in file_path:
                    df = calculate_metrics(df, 'zero', REPO_PATH=REPO_PATH)
                    zero_shot_buffer.append(df)
                elif 'test' in file_path:
                    df = calculate_metrics(df, 'test', REPO_PATH=REPO_PATH)
                    test_buffer.append(df)
            #validation_dfs.append(pd.concat(validation_buffer))
            test_dfs.append(pd.concat(test_buffer))
            zero_shot_dfs.append(pd.concat(zero_shot_buffer))
    return validation_dfs, test_dfs, zero_shot_dfs


In [49]:
def box_plot_experiments(list_of_df, name, save_path, loss_number=0, dataset_names=None, metric_names=None):
    """
    Genreates box plots for all metrics contained in the dataframes.
    Compares these metrics for each dataframe in the list.

    Parameters:
    list_of_df (list): A list of dataframes.
    name (str): The name of the experiment.
    save_path (str): The path to save the plot.

    Returns:
    pd.DataFrame: A concatenated dataframe containing all data with a coloumn tagging the used Loss.
    """
    dataset_to_indices = {'geo_strongly_balanced':0, 'geo_unbalanced':1, 'geo_weakly_balanced':2, 'mixed_strongly_balanced':3, 'mixed_weakly_balanced':4}
    if dataset_names is not None:
        indices = [dataset_to_indices[name] for name in dataset_names]
        list_of_df = [list_of_df[i] for i in indices]


    sns.set_theme(style="whitegrid")
    list_of_df = [df[loss_number].copy() for df in list_of_df]

    cols_to_use = metric_names.copy()
    cols_to_use.append('Experiment')

    for i in range(len(list_of_df)):
        list_of_df[i] = list_of_df[i].assign(Experiment=list(dataset_to_indices.keys())[indices[i]])
        list_of_df[i] = list_of_df[i][cols_to_use]
        #cols_to_drop = list_of_df[i].filter(like='text', axis=1).columns
        #list_of_df[i] = list_of_df[i].drop(columns=cols_to_drop)

        list_of_df[i].columns = list_of_df[i].columns.str.split().str[-2:].str.join(" ")

    condf = pd.concat(list_of_df)
    meltdf = condf.melt(id_vars=["Experiment"], var_name="Metric", value_name="Value")
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x[0]) if isinstance(x,list) else x) 
    meltdf["Value"] = meltdf["Value"].apply(lambda x: float(x.item()) if isinstance(x,torch.Tensor) else x) 

    ax = sns.boxplot(
        x="Metric", y="Value", hue="Experiment", data=meltdf, showfliers=False
    )
    ax.set_title(name)
    lgd = plt.legend(loc='upper left', bbox_to_anchor=(1,1), fontsize='small', borderaxespad=0.0)
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=10)
    plt.savefig(
        save_path + f"{name}-boxplot.png",
        bbox_extra_artists=(lgd,),
        bbox_inches="tight",
    )
    plt.clf()
    plt.close()
    return condf


## Run eval

In [50]:
REPO_PATH = '/home/leon/Documents/GPML/good_practices_ml/'

# directory of all experiments
experimient_dir= '/media/leon/Samsung_T5/Uni/good_practices_ml/runs/merged_seeds/'
# create lists that contain the dataframes of the different experiments
# First axis contains the different dataset configurations
# Second axis contains the different Loss configurations
# Third axis contains the DataFrame of the different seeds
validation_sets = []
test_sets = []
zeros_shot_datasets = []


for folder in sorted(os.listdir(experimient_dir)):
    log_dir = os.path.join(experimient_dir, folder)
    if os.path.isdir(log_dir):
        save_path = log_dir + '/results/'
        # Call the event_to_df function with the log directory 
        val, test, zero= read_csv_from_dir(log_dir,REPO_PATH)
        validation_sets.append(val)
        test_sets.append(test)
        zeros_shot_datasets.append(zero)



ValueError: array length 1 does not match index length 5729

In [None]:
print([len(val) for val in test_sets])
print(len(test_sets))

[4, 4, 4, 4, 4]
5


In [None]:
dataset_config = ['Strongly Balanced', 'Unbalanced', 'Weakly Balanced', 'Mixed Strongly Balanced', 'Mixed Unbalanced', 'Mixed Weakly Balanced']
save_path = '/media/leon/Samsung_T5/Uni/good_practices_ml/figures/'
for i, experiment in enumerate(test_sets):
    name = dataset_config[i]
    compare_loss(experiment, name, save_path)

Metric: country_accuracy
No significant difference between L1 and L1
L1 is significantly better than L2
L1 is significantly better than L3
L1 is significantly better than L4
L1 is significantly better than L2
No significant difference between L2 and L2
L2 is significantly better than L3
No significant difference between L2 and L4
L1 is significantly better than L3
L2 is significantly better than L3
No significant difference between L3 and L3
L4 is significantly better than L3
L1 is significantly better than L4
No significant difference between L4 and L2
L4 is significantly better than L3
No significant difference between L4 and L4
Metric: country_precision
No significant difference between L1 and L1
L1 is significantly better than L2
L1 is significantly better than L3
L1 is significantly better than L4
L1 is significantly better than L2
No significant difference between L2 and L2
L2 is significantly better than L3
No significant difference between L2 and L4
L1 is significantly better t

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Metric: country_recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: country_f1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between 

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Metric: region_accuracy
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: region_precision
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference b

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Metric: region_recall
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: region_f1
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


Metric: ignored_classes
No significant difference between L1 and L1
No significant difference between L1 and L2
No significant difference between L1 and L3
No significant difference between L1 and L4
No significant difference between L2 and L1
No significant difference between L2 and L2
No significant difference between L2 and L3
No significant difference between L2 and L4
No significant difference between L3 and L1
No significant difference between L3 and L2
No significant difference between L3 and L3
No significant difference between L3 and L4
No significant difference between L4 and L1
No significant difference between L4 and L2
No significant difference between L4 and L3
No significant difference between L4 and L4
Metric: country_accuracy
No significant difference between L1 and L1
L1 is significantly better than L2
L1 is significantly better than L3
No significant difference between L1 and L4
L1 is significantly better than L2
No significant difference between L2 and L2
L2 is sign

  res = hypotest_fun_out(*samples, **kwds)


Metric: country_precision
No significant difference between L1 and L1
L1 is significantly better than L2
L1 is significantly better than L3
L1 is significantly better than L4
L1 is significantly better than L2
No significant difference between L2 and L2
L2 is significantly better than L3
L4 is significantly better than L2
L1 is significantly better than L3
L2 is significantly better than L3
No significant difference between L3 and L3
L4 is significantly better than L3
L1 is significantly better than L4
L4 is significantly better than L2
L4 is significantly better than L3
No significant difference between L4 and L4
Metric: country_recall
No significant difference between L1 and L1
L1 is significantly better than L2
L1 is significantly better than L3
L1 is significantly better than L4
L1 is significantly better than L2
No significant difference between L2 and L2
L2 is significantly better than L3
L4 is significantly better than L2
L1 is significantly better than L3
L2 is significantly be

In [None]:
#validation_sets_with_region = [df.filter(like='region') for df in validation_sets]
#validation_sets_with_country = [df.filter(like='country') for df in validation_sets]

test_sets_with_region = [[df.filter(like='region') for df in sub_array] for sub_array in test_sets]
test_sets_with_country = [[df.filter(like='country') for df in sub_array] for sub_array in test_sets]

zero_shot_sets_with_region = [[df.filter(like='region') for df in sub_array] for sub_array in zeros_shot_datasets]
zero_shot_sets_with_country = [[df.filter(like='country') for df in sub_array] for sub_array in zeros_shot_datasets]

In [None]:
dataset_names = ['geo_strongly_balanced', 'geo_weakly_balanced', 'mixed_strongly_balanced', 'mixed_weakly_balanced','geo_unbalanced']
#box_plot_experiments(validation_sets_with_region, 'Validation Region', save_path, loss_number=0, dataset_names=['geo_strongly_balanced', 'geo_unbalanced', 'geo_weakly_balanced'], metric_names=['region_accuracy', 'region_precision', 'region_recall', 'region_f1'])
#box_plot_experiments(validation_sets_with_country, 'Validation Country', save_path, loss_number=0, dataset_names=['geo_strongly_balanced', 'geo_unbalanced', 'geo_weakly_balanced'], metric_names=['country_accuracy', 'country_precision', 'country_recall', 'country_f1'])

box_plot_experiments(test_sets_with_region, 'Test Region', save_path, loss_number=0, dataset_names=dataset_names, metric_names=['region_accuracy', 'region_precision', 'region_recall', 'region_f1'])
box_plot_experiments(test_sets_with_country, 'Test Country', save_path, loss_number=0, dataset_names=dataset_names, metric_names=['country_accuracy', 'country_precision', 'country_recall', 'country_f1'])

box_plot_experiments(zero_shot_sets_with_region, 'Zero Shot Region', save_path, loss_number=0, dataset_names=dataset_names, metric_names=['region_accuracy', 'region_precision', 'region_recall', 'region_f1'])
box_plot_experiments(zero_shot_sets_with_country, 'Zero Shot Country', save_path, loss_number=0, dataset_names=dataset_names, metric_names=['country_accuracy', 'country_precision', 'country_recall', 'country_f1'])



Unnamed: 0,country_accuracy,country_precision,country_recall,country_f1,Experiment
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced
0,tensor(0.),0.0,0.0,0.0,geo_strongly_balanced


In [None]:
len(test_sets)

5

In [None]:

for experiment in test_sets:
    buffer = []
    for i, df in enumerate(experiment):
        buffer.append(df.assign(Experiment=f'L{i+1}').drop(columns=['prediction', 'label', 'ignored_classes', 'ignored_regions']))
    buffer = pd.concat(buffer)
    print(buffer.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())



KeyError: "['output', 'label', 'ignored_regions'] not found in axis"

In [None]:
for experiment in test_sets:
    buffer = []
    for i, df in enumerate(experiment):
        buffer.append(df.assign(Experiment=f'L{i+1}')['ignored_classes', 'ignored_regions'])
    buffer = pd.concat(buffer)
    print(buffer.groupby('Experiment').mean().round(decimals=3).style.highlight_max(props='textbf:--rwrap;').format(precision=3).to_latex())
box_plot_experiments(test_sets, 'Test', save_path, loss_number=0, metric_names=['ignored_classes', 'ignored_regions'])


KeyError: ('ignored_classes', 'ignored_regions')

In [None]:
def createConfusionMatrix(true_countries, predicted_countries, figure_label, REPO_PATH):
        """
        Creates and visualizes the confusion matrix for country and region predictions.

        Args:
            true_countries (list): List of true country labels.
            predicted_countries (list): List of predicted country labels.
            figure_label (str): Label for the generated figures.

        Returns:
            None
        """
        country_list = f'{REPO_PATH}/utils/country_list/country_list_region_and_continent.csv'
        country_list = pd.read_csv(country_list)
        regional_ordering_index = [8, 11, 144, 3, 4, 12, 16, 26, 28, 44, 46, 51, 52, 66, 74, 83, 95, 101, 105, 109, 121, 128, 153, 180, 191, 201, 202, 32, 43, 77, 81, 134, 140, 146, 179, 99, 106, 185, 187, 198, 58, 98, 122, 131, 133, 136, 159, 163, 166, 177, 178, 193, 195, 209, 210, 41, 80, 97, 102, 103, 126, 127, 192, 20, 31, 48, 84, 119, 152, 160, 162, 173, 194, 60, 137, 149, 165, 204, 78, 156, 7, 34, 35, 40, 64, 53, 56, 116, 117, 167, 188, 23, 33, 72, 196, 13, 50, 55, 59, 62, 65, 69,
                                        86, 88, 92, 94, 113, 115, 142, 168, 172, 38, 148, 189, 205, 9, 25, 27, 39, 42, 54, 61, 68, 76, 79, 147, 157, 197, 200, 24, 85, 100, 107, 125, 135, 150, 169, 184, 186, 203, 30, 138, 182, 208, 2, 17, 29, 89, 91, 111, 132, 143, 151, 0, 5, 15, 57, 71, 75, 82, 93, 120, 123, 130, 155, 161, 171, 175, 199, 206, 19, 22, 37, 45, 70, 73, 112, 124, 129, 139, 170, 174, 176, 183, 1, 6, 14, 21, 47, 67, 87, 90, 96, 104, 108, 145, 154, 158, 164, 181, 190, 207, 10, 18, 36, 49, 63, 110, 114, 118, 141]
        # constant for classes
        classes = country_list['Country']
        np_classes = np.array(classes)

        # Build country confusion matrix
        cf_matrix = confusion_matrix(
            true_countries, predicted_countries, labels=range(0, 211))
        ordered_index = np.argsort(-cf_matrix.diagonal())
        ordered_matrix = cf_matrix[ordered_index][:, ordered_index]

        regionally_ordered_matrix = cf_matrix[regional_ordering_index][:,
                                                                            regional_ordering_index]

        ordered_classes = np_classes[ordered_index]
        regionally_ordered_classes = np_classes[regional_ordering_index]

        df_cm = pd.DataFrame(cf_matrix, index=classes, columns=classes)
        ordered_df_cm = pd.DataFrame(
            ordered_matrix, index=ordered_classes, columns=ordered_classes)
        regionally_ordered_df_cm = pd.DataFrame(
            regionally_ordered_matrix, index=regionally_ordered_classes, columns=regionally_ordered_classes)

        np_regions = np.sort(
            np.array(list(set(country_list['Intermediate Region Name']))))

        # Build region confusion matrix
        true_regions = []
        predicted_regions = []
        for i in range(0, len(true_countries)):
            true_regions.append(ast.literal_eval(
                country_list.iloc[true_countries[i]]["One Hot Region"]).index(1))
            predicted_regions.append(ast.literal_eval(
                country_list.iloc[predicted_countries[i]]["One Hot Region"]).index(1))

        regions_cf_matrix = confusion_matrix(
            true_regions, predicted_regions, labels=range(0, len(np_regions)))
        regions_ordered_index = np.argsort(-regions_cf_matrix.diagonal())
        regions_ordered_matrix = regions_cf_matrix[regions_ordered_index][:,
                                                                          regions_ordered_index]

        ordered_regions = np_regions[regions_ordered_index]

        regions_df_cm = pd.DataFrame(
            regions_cf_matrix, index=np_regions, columns=np_regions)
        regions_ordered_df_cm = pd.DataFrame(
            regions_ordered_matrix, index=ordered_regions, columns=ordered_regions)
        plt.figure(1, figsize=(120, 70))
        figure = sn.heatmap(df_cm, cmap=sn.cubehelix_palette(
            as_cmap=True)).get_figure()
        plt.figure(2, figsize=(120, 70))
        ordered_figure = sn.heatmap(
            ordered_df_cm, cmap=sn.cubehelix_palette(as_cmap=True)).get_figure()
        plt.figure(3, figsize=(120, 70))
        regionally_ordered_figure = sn.heatmap(
            regionally_ordered_df_cm, cmap=sn.cubehelix_palette(as_cmap=True)).get_figure()
        plt.figure(4, figsize=(120, 70))
        regions_figure = sn.heatmap(
            regions_df_cm, cmap=sn.cubehelix_palette(as_cmap=True)).get_figure()
        plt.figure(5, figsize=(120, 70))
        regions_ordered_figure = sn.heatmap(
            regions_ordered_df_cm, cmap=sn.cubehelix_palette(as_cmap=True)).get_figure()
        figure.savefig(f'{REPO_PATH}/figures/{figure_label}_country.png')
        ordered_figure.savefig(save_path)
        regionally_ordered_figure.savefig(save_path)
        regions_figure.savefig(save_path)
        regions_ordered_figure.savefig(save_path)
        plt.close(figure)
        plt.close(ordered_figure)
        plt.close(regionally_ordered_figure)
        plt.close(regions_figure)
