In [1]:
import os
import pandas as pd
import numpy as np
import scipy.stats as stats
from itertools import product
import scikit_posthocs as sp
import matplotlib.pyplot as plt
import seaborn as sns

def read_csv(folder_path, dataset_name, reduction_mode):
    if reduction_mode:
        csv_files = [f for f in os.listdir(folder_path) if (dataset_name in f and reduction_mode in f and 'all' not in f)]
    else:
        csv_files = [f for f in os.listdir(folder_path) if (dataset_name in f and 'all' not in f)]
    df_list = []
    for file in csv_files:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        df_list.append(df)

    df_combined = pd.concat(df_list, ignore_index=True)
    return df_combined

In [2]:
def aggregate_metrics(df_combined, method):
    if method == 'knn':
        columns = ['K', 'Distance', 'Voting scheme', 'Weight scheme']
    elif method == 'svm':
        columns = ['Kernel']
    
    grouped_df = df_combined.groupby(columns)

    # Compute mean and standard deviation of the relevant metrics
    metrics_summary = grouped_df.agg({
        'Accuracy': ['mean', 'std'],
        'Precision_Class_0': ['mean', 'std'],
        'Recall_Class_0': ['mean', 'std'],
        'F1_Class_0': ['mean', 'std'],
        'Precision_Class_1': ['mean', 'std'],
        'Recall_Class_1': ['mean', 'std'],
        'F1_Class_1': ['mean', 'std'],
        'Solving Time': ['mean', 'std']
    }).reset_index()


    # Rename the columns for clarity
    metrics_summary.columns = columns + [
                            'Accuracy_mean', 'Accuracy_std',
                            'Precision_Class_0_mean', 'Precision_Class_0_std',
                            'Recall_Class_0_mean', 'Recall_Class_0_std',
                            'F1_Class_0_mean', 'F1_Class_0_std',
                            'Precision_Class_1_mean', 'Precision_Class_1_std',
                            'Recall_Class_1_mean', 'Recall_Class_1_std',
                            'F1_Class_1_mean', 'F1_Class_1_std',
                            'Solving Time_mean', 'Solving Time_std']

    metrics_summary = metrics_summary.sort_values(by='Accuracy_mean', ascending = False)

    # Get the best hyperparameters (the first row after sorting)
    best_hyperparams = metrics_summary.iloc[0][columns].to_dict()
    best_accuracy = metrics_summary.iloc[0]['Accuracy_mean']

    # Print the best hyperparameters and their accuracy
    print("Best Hyperparameters:")
    print(best_hyperparams)
    print(f"Best Accuracy: {best_accuracy:.4f}")
    
    return metrics_summary, best_hyperparams


In [3]:
def get_metrics_knn(best_models, df_combined):
    metric = 'Accuracy'
    metric_values = []
    for _, row in best_models.iterrows():
        # Store model identification (e.g., hyperparameters)
        model_id = (row['Voting scheme'], row['Weight scheme'], row['Distance'], row['K'])

        # Filter for the current hyperparameter combination
        filtered_df = df_combined[
            (df_combined['Voting scheme'] == row['Voting scheme']) & 
            (df_combined['Weight scheme'] == row['Weight scheme']) &
            (df_combined['Distance'] == row['Distance']) &
            (df_combined['K'] == row['K'])
        ]
        # Collect accuracy values for this combination
        metric_values.append(list(filtered_df[metric].values))
    
    return metric_values

def get_metrics_svm(best_models, df_combined):
    metric = 'Accuracy'
    metric_values = []
    for _, row in best_models.iterrows():
        # Store model identification (e.g., hyperparameters)
        model_id = (row['Kernel'])

        # Filter for the current hyperparameter combination
        filtered_df = df_combined[
            (df_combined['Kernel'] == row['Kernel'])
        ]
    # Collect accuracy values for this combination
        metric_values.append(list(filtered_df[metric].values))
    
    return metric_values

In [4]:
def evaluation_test(metric_values):

    # Perform the Friedman test if there are three or more models
    if len(metric_values) >= 3:

        stat, p_value = stats.friedmanchisquare(*metric_values)
        print(f"Friedman test statistic: {stat}, p-value: {p_value}")

        if p_value < 0.05:
            print("Significant differences found, conducting Nemenyi post-hoc test")
            data = pd.DataFrame(metric_values).T  # Transpose so each column is a model
            nemenyi_result = sp.posthoc_nemenyi_friedman(data)
            print(nemenyi_result)
        else:
            print("No significant differences found between the models.")
    
    # Perform the Friedman test if there are two models
    elif len(metric_values) == 2:

        stat, p_value = stats.wilcoxon(metric_values[0], metric_values[1])
        print(f"Wilcoxon signed-rank test statistic: {stat}, p-value: {p_value}")

        if p_value < 0.05:
            print("Significant difference between the two models.")
        else:
            print("No significant differences found between the two models.")
    else:
        print("Not enough data to perform the test.")
        return

In [5]:
def evaluation_t_test(metric_values1, metric_values2):

    stat, p_value = stats.ttest_rel(metric_values1, metric_values2)
    print(f"Paired t-test statistic: {stat}, p-value: {p_value}")

    if p_value < 0.05:
        print("Significant difference found between the two models.")
    else:
        print("No significant differences found between the two models.")

In [7]:
#Read_csvs

dataset_names = ['grid','sick']
methods = ['knn','svm']
best_params = {}

for dataset_name in dataset_names:
    best_params[dataset_name] = {}
    for method in methods:
        print(f'Evaluating method {method} on dataset: {dataset_name}')
        df_combined = read_csv(f'results_{method}', dataset_name, False)
        metrics_summary, best_hyperparams = aggregate_metrics(df_combined, method)
        metrics_summary.to_csv(f'results_{method}/results_{dataset_name}_all.csv', index=False)
        best_params[dataset_name][method] = best_hyperparams
        best_models = metrics_summary.head(5)
        if method == 'knn':
            metric_values = get_metrics_knn(best_models, df_combined)
            best_params
        elif method == 'svm':
            metric_values = get_metrics_svm(best_models, df_combined)
        evaluation_test(metric_values)
        print('\n')
 

Evaluating method knn on dataset: grid
Best Hyperparameters:
{'K': 7, 'Distance': 'minkowski2', 'Voting scheme': 'Majority_class', 'Weight scheme': 'Mutual_classifier'}
Best Accuracy: 0.9672
Friedman test statistic: 3.857988165680463, p-value: 0.4255660834829863
No significant differences found between the models.


Evaluating method svm on dataset: grid
Best Hyperparameters:
{'Kernel': 'rbf'}
Best Accuracy: 0.9142
Wilcoxon signed-rank test statistic: 0.0, p-value: 0.001953125
Significant difference between the two models.


Evaluating method knn on dataset: sick
Best Hyperparameters:
{'K': 7, 'Distance': 'HEOM', 'Voting scheme': 'Majority_class', 'Weight scheme': 'Mutual_classifier'}
Best Accuracy: 0.9626
Friedman test statistic: 0.3773584905660699, p-value: 0.9842883045217616
No significant differences found between the models.


Evaluating method svm on dataset: sick
Best Hyperparameters:
{'Kernel': 'rbf'}
Best Accuracy: 0.9594
Wilcoxon signed-rank test statistic: 1.0, p-value: 0.01

In [48]:
dataframe_best_results_together = {}

num_samples = {'grid': 1700, 'sick': 3395}

for dataset_name in dataset_names:
    dataframe_best_results_together[dataset_name] = pd.DataFrame()
    for method in methods:
        df_combined = read_csv(f'results_{method}', dataset_name, False)
        metrics_summary, best_hyperparams = aggregate_metrics(df_combined, method)
        
        # Make a copy of the best_models DataFrame to avoid SettingWithCopyWarning
        best_models = metrics_summary.head(5).copy()
        
        if method == 'knn':
            metric_values = get_metrics_knn(best_models, df_combined)[0]
        elif method == 'svm':
            metric_values = get_metrics_svm(best_models, df_combined)[0]
        
        best_models.loc[:, 'Method'] = method
        best_models.loc[:, 'Num samples'] = num_samples[dataset_name]
        dataframe_best_results_together[dataset_name] = pd.concat((dataframe_best_results_together[dataset_name], best_models.head(1)))
        
        for reduction in ['CNN', 'DROP', 'EENTh']:

            print(f'\nComparing method {method} on dataset {dataset_name} with reduction {reduction}')
            results_reduced = read_csv(f'results_{method}_reduced', dataset_name, reduction)
            metrics_summary, best_hyperparams = aggregate_metrics(results_reduced, method)
            
            # Make a copy of the best_models DataFrame to avoid SettingWithCopyWarning
            best_models = metrics_summary.head(5).copy()
            
            best_models.loc[:, 'Method'] = method
            best_models.loc[:, 'reduction'] = reduction
            best_models.loc[:, 'Num samples'] = results_reduced['Num samples']
            
            dataframe_best_results_together[dataset_name] = pd.concat((dataframe_best_results_together[dataset_name], best_models.head(1)))
            
            metrics_summary.to_csv(f'results_{method}_reduced/results_{dataset_name}_{reduction}_all.csv', index=False)
            metric_values_reduced = results_reduced['Accuracy'].values
            
            evaluation_t_test(metric_values, metric_values_reduced)

Best Hyperparameters:
{'K': 7, 'Distance': 'minkowski2', 'Voting scheme': 'Majority_class', 'Weight scheme': 'Mutual_classifier'}
Best Accuracy: 0.9672

Comparing method knn on dataset grid with reduction CNN
Best Hyperparameters:
{'K': 7, 'Distance': 'minkowski2', 'Voting scheme': 'Majority_class', 'Weight scheme': 'Mutual_classifier'}
Best Accuracy: 0.6166
Paired t-test statistic: 10.148441037251782, p-value: 3.1647943713369135e-06
Significant difference found between the two models.

Comparing method knn on dataset grid with reduction DROP
Best Hyperparameters:
{'K': 7, 'Distance': 'minkowski2', 'Voting scheme': 'Majority_class', 'Weight scheme': 'Mutual_classifier'}
Best Accuracy: 0.9295
Paired t-test statistic: 13.417601525242189, p-value: 2.9586843076967304e-07
Significant difference found between the two models.

Comparing method knn on dataset grid with reduction EENTh
Best Hyperparameters:
{'K': 7, 'Distance': 'minkowski2', 'Voting scheme': 'Majority_class', 'Weight scheme': '

In [46]:
columns = ['Accuracy_mean', 'Num samples', 'Solving Time_mean', 'reduction', 'Method'] 
dataframe_best_results_together['grid'][columns]

Unnamed: 0,Accuracy_mean,Num samples,Solving Time_mean,reduction,Method
103,0.967162,1700,3.240735,,knn
0,0.616602,130,0.365052,CNN,knn
0,0.929545,341,0.843815,DROP,knn
0,0.97191,1552,3.357328,EENTh,knn
0,0.914167,1700,0.284618,,svm
0,0.854294,130,0.011683,CNN,svm
0,0.889282,341,0.017874,DROP,svm
0,0.914696,1552,0.160981,EENTh,svm
