# functions

In [None]:
import csv
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

In [None]:
def load_global_perplexities(base_dir, group, epochs, transcription_type, batch_size):
    patient_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    patient_dirs.sort()
    
    dict_ppl = {e: [] for e in epochs}

    for patient_dir in patient_dirs:
        patient_path = os.path.join(base_dir, patient_dir)
        for e in epochs:
            file_name = f"{patient_dir}_modello_{group}_{transcription_type}_{batch_size}b_{e}ep_global_ppl_score.txt"
            try:
                with open(os.path.join(patient_path, file_name), 'r') as f:
                    perplexity = float(f.read().strip())
                    dict_ppl[e].append(perplexity)
            except FileNotFoundError:
                continue

    return dict_ppl

In [None]:
def process_and_plot_global(w, epochs, fold, dataset, transcription_type, batch_size, disease_class, control_class):
    base_dir = f"{dataset}/{dataset}_fold_{fold}/{dataset}_fold_{fold}_w{w}_l0/dev/{transcription_type}"

    dict_ad = load_global_perplexities(base_dir, disease_class, epochs, transcription_type, batch_size)
    dict_cn = load_global_perplexities(base_dir, control_class, epochs, transcription_type, batch_size)
    

    mean_ad = [np.nanmean(dict_ad[e]) for e in epochs]
    mean_cn = [np.nanmean(dict_cn[e]) for e in epochs]

    # Plotting
    plt.figure(figsize=(10, 6))
    plt.plot(epochs, mean_ad, marker='o', label='AD Model on all dev patients', color='red')
    plt.plot(epochs, mean_cn, marker='o', label='CN Model on all dev patients', color='blue')
    plt.xlabel('Epochs')
    plt.ylabel('Perplexity')
    #plt.title(f'Global Perplexity Evolution - Dev. Set - Fold {fold} - Window {w}')
    plt.grid(True, linestyle='--', alpha=0.7)
    
    plt.xticks(ticks=[5,10,15])  # ensures all epoch numbers are displayed
    plt.ylim(10,50)

    
    plt.legend()
    for means, color in zip([mean_ad, mean_cn], ['red', 'blue']):
        for x, y in zip(epochs, means):
            plt.text(x, y + 0.02, f"{y:.2f}", ha='center', color=color)
    plt.tight_layout()
    
    # check if the folder fig/{dataset}/ exists, if not create it
    if not os.path.exists(f"fig/{dataset}"):
        os.makedirs(f"fig/{dataset}")
        
    # Save the plot        
    plot_name = f"fig/{dataset}/perplexity_evolution_dev_set_fold_{fold}_w{w}"
    plt.savefig(f"{plot_name}.png")
    
    plt.close()

    # Matrice differenze (AD - CN)
    matrix = np.empty((len(epochs), len(epochs)))
    for i, ad_epoch in enumerate(epochs):
        for j, cn_epoch in enumerate(epochs):
            ad_vals = dict_ad[ad_epoch]
            cn_vals = dict_cn[cn_epoch]
            if ad_vals and cn_vals:  # check that both lists are not empty
                matrix[i, j] = np.mean(ad_vals) - np.mean(cn_vals)
            else:
                matrix[i, j] = np.nan
                
    # check if the folder {dataset}/{dataset}_matrices/ exists, if not create it
    if not os.path.exists(f"{dataset}/{dataset}_matrices"):
        os.makedirs(f"{dataset}/{dataset}_matrices")
    
    # Save the matrix to a CSV file
    matrix_df = pd.DataFrame(matrix, index=epochs, columns=epochs)
    matrix_df.to_csv(f"{dataset}/{dataset}_matrices/matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index=True, header=True)

    # Save the mean_ad and mean_cn to CSV files
    mean_ad_df = pd.DataFrame(mean_ad, index=epochs, columns=['Mean AD'])
    mean_ad_df.to_csv(f"{dataset}/{dataset}_matrices/mean_ad_fold_{fold}_w{w}.csv", index=True, header=True)
    mean_cn_df = pd.DataFrame(mean_cn, index=epochs, columns=['Mean CN'])
    mean_cn_df.to_csv(f"{dataset}/{dataset}_matrices/mean_cn_fold_{fold}_w{w}.csv", index=True, header=True)        

In [None]:
def classification_test(base_dir_test, epochs, fold, w, dataset, transcription_type, batch_size, disease_class, control_class):
    dict_ad = load_global_perplexities(base_dir_test, disease_class, epochs, transcription_type, batch_size)
    dict_cn = load_global_perplexities(base_dir_test, control_class, epochs, transcription_type, batch_size)
    
    label_df = pd.read_csv(f"{dataset}/labels.csv", sep=';')
    labels = dict(zip(label_df['patient_id'], label_df['label']))
    
    results = []

    # Loop over all epoch pairs
    for i, ad_epoch in enumerate(epochs):
        for j, cn_epoch in enumerate(epochs):
            errors = 0
            total = 0
            predictions = []
            count_ad_pred_cn = 0
            count_cn_pred_ad = 0
            count_ad_pred_ad = 0
            count_cn_pred_cn = 0

            for subj_idx, subj_id in enumerate(dict_ad[ad_epoch]):
                try:
                    subj_label = labels[list(label_df['patient_id'])[subj_idx]]
                    value = list(label_df['patient_id'])[subj_idx]  # subject ID
                except IndexError:
                    continue

                try:
                    ppl_ad = dict_ad[ad_epoch][subj_idx]
                    ppl_cn = dict_cn[cn_epoch][subj_idx]
                except IndexError:
                    continue

                diff = ppl_ad - ppl_cn
                if diff > 0:
                    pred = 'Control' if disease_class == 'ad' else 'human'
                else:
                    pred = 'Non Healthy' if control_class == 'cn' else 'machine'


                predictions.append((subj_idx, subj_label, pred))
                
                if pred != subj_label:
                    errors += 1
                    if pred == 'Control':
                        count_ad_pred_cn += 1
                    else:
                        count_cn_pred_ad += 1
                else:
                    if pred == 'Control':
                        count_cn_pred_cn += 1
                    else:
                        count_ad_pred_ad += 1
                    
                total += 1

            # accuracy
            acc = 1 - (errors / total) if total else 0

       
            # F1 score calculation
            # precision = tp / (tp + fp)
            # recall = tp / (tp + fn)
            
            f1_per_class = {}
            precision_ad = count_ad_pred_ad / (count_ad_pred_ad + count_cn_pred_ad) if (count_ad_pred_ad + count_cn_pred_ad) > 0 else 0
            recall_ad = count_ad_pred_ad / (count_ad_pred_ad + count_ad_pred_cn) if (count_ad_pred_ad + count_ad_pred_cn) > 0 else 0
            f1_per_class[disease_class] = 2 * (precision_ad * recall_ad) / (precision_ad + recall_ad) if (precision_ad + recall_ad) > 0 else 0
            
            precision_cn = count_cn_pred_cn / (count_cn_pred_cn + count_ad_pred_cn) if (count_cn_pred_cn + count_ad_pred_cn) > 0 else 0
            recall_cn = count_cn_pred_cn / (count_cn_pred_cn + count_cn_pred_ad) if (count_cn_pred_cn + count_cn_pred_ad) > 0 else 0
            f1_per_class[control_class] = 2 * (precision_cn * recall_cn) / (precision_cn + recall_cn) if (precision_cn + recall_cn) > 0 else 0

            macro_f1 = (f1_per_class[disease_class] + f1_per_class[control_class]) / 2

            # prepare results entry
            result_entry = {
                'ad_epoch': ad_epoch,
                'cn_epoch': cn_epoch,
                'errors': errors,
                'accuracy': acc,
                'ad_predicted_cn': count_ad_pred_cn,
                'cn_predicted_ad': count_cn_pred_ad,
                f'f1_{disease_class}': f1_per_class[disease_class],
                f'f1_{control_class}': f1_per_class[control_class],
                'macro_f1': macro_f1
            }

            results.append(result_entry)
    # check if the folder {dataset}/{dataset}_results/ exists, if not create it
    if not os.path.exists(f"{dataset}/{dataset}_results"):
        os.makedirs(f"{dataset}/{dataset}_results")
    
    results_df = pd.DataFrame(results)
    results_df.to_csv(f"{dataset}/{dataset}_results/classification_results_fold_{fold}_w{w}.csv", index=False)
    print(f"\nClassification results for Fold {fold} - Window {w}:")

In [None]:
def print_correlation_ppl_valid_classific_test_not_overfitting(epochs, fold, w, dataset):
    try:
        # Load results
        results_df = pd.read_csv(f"{dataset}/{dataset}_results/classification_results_fold_{fold}_w{w}.csv")

        # Load matrix as NumPy array
        matrix_df = pd.read_csv(f"{dataset}/{dataset}_matrices/matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index_col=0)
        matrix = matrix_df.values  # convert to NumPy array
        
        mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_ad_fold_{fold}_w{w}.csv", index_col=0)
        mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_cn_fold_{fold}_w{w}.csv", index_col=0)
    except Exception as e:
        print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
        return

    min_ad_epoch_idx = mean_ad_df.values.argmin()
    min_cn_epoch_idx = mean_cn_df.values.argmin()
    min_ad_epoch_val = epochs[min_ad_epoch_idx]
    min_cn_epoch_val = epochs[min_cn_epoch_idx]


    # Filter using actual epoch values
    results_df = results_df[
        (results_df['ad_epoch'] <= min_ad_epoch_val) & (results_df['cn_epoch'] <= min_cn_epoch_val)
    ]

    # Compute deltas *after* filtering
    deltas = []
    for _, row in results_df.iterrows():
        try:
            i = epochs.index(row['ad_epoch'])
            j = epochs.index(row['cn_epoch'])
            deltas.append(abs(matrix[i, j]))
        except Exception:
            deltas.append(np.nan)

    results_df['delta'] = deltas
    results_df.dropna(subset=['accuracy', 'delta'], inplace=True)

    if len(results_df) < 2:
        print(f"⚠️ Not enough valid data to compute correlation for fold={fold}, w={w}")
        return

    # Correlation computations
    pearson_corr, pearson_p = pearsonr(results_df['delta'], results_df['accuracy'])
    spearman_corr, spearman_p = spearmanr(results_df['delta'], results_df['accuracy'])

        
    print(f"📊 Pearson correlation: {pearson_corr:.4f} (p={pearson_p:.4e})")
    print(f"📈 Spearman correlation: {spearman_corr:.4f} (p={spearman_p:.4e})")

    # Plotting
    plt.figure(figsize=(8, 6))
    plt.scatter(results_df['delta'], results_df['accuracy'], alpha=0.7)

    # Regression line
    z = np.polyfit(results_df['delta'], results_df['accuracy'], 1)
    p = np.poly1d(z)
    x_vals = np.sort(results_df['delta'])
    plt.plot(x_vals, p(x_vals), "r--")

    # Text annotation
    plt.text(
        0.05, 0.05,
        f"Pearson r: {pearson_corr:.4f} (p={pearson_p:.4e})\n"
        f"Spearman r: {spearman_corr:.4f} (p={spearman_p:.4e})",
        transform=plt.gca().transAxes,
        bbox=dict(facecolor='white', alpha=0.8),
        fontsize=20
    )

    plt.yticks([0,0.25,0.5,0.75,1])
    plt.yticks(fontsize=20)
    plt.xticks(fontsize=20)

    
    plt.xlim(left=0)
    max_val = results_df['delta'].max()
    plt.xticks(np.arange(0, max_val + 2, 5))
    
    plt.grid(True)
    #plt.legend()
    plt.tight_layout()

    # Save plot
    plot_path = f"fig/{dataset}/not_overfitting_accuracy_vs_delta_fold_{fold}_w{w}.png"
    plt.savefig(plot_path)
    plt.close()
    print(f"✅ Plot saved: {plot_path}")


def print_correlation_ppl_valid_classific_test(epochs, fold, w, dataset):
    try:
        # Load results
        results_df = pd.read_csv(f"{dataset}/{dataset}_results/classification_results_fold_{fold}_w{w}.csv")

        # Load matrix as NumPy array
        matrix_df = pd.read_csv(f"{dataset}/{dataset}_matrices/matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index_col=0)
        matrix = matrix_df.values  # convert to NumPy array
    except Exception as e:
        print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
        return

    # Compute deltas using index lookup
    deltas = []
    for _, row in results_df.iterrows():
        try:
            i = epochs.index(row['ad_epoch'])
            j = epochs.index(row['cn_epoch'])
            deltas.append(abs(matrix[i, j]))
        except Exception:
            deltas.append(np.nan)

    results_df['delta'] = deltas
    results_df.dropna(subset=['accuracy', 'delta'], inplace=True)

    if len(results_df) < 2:
        print(f"⚠️ Not enough valid data to compute correlation for fold={fold}, w={w}")
        return

    # Correlation computations
    pearson_corr, pearson_p = pearsonr(results_df['delta'], results_df['accuracy'])
    spearman_corr, spearman_p = spearmanr(results_df['delta'], results_df['accuracy'])

    # save the results_df with the 'delta' column to a CSV file
    results_df.to_csv(f"{dataset}/{dataset}_results/with_delta_classification_results_fold_{fold}_w{w}.csv", index=False) 
        

    #print(f"📊 Pearson correlation: {pearson_corr:.4f} (p={pearson_p:.4e})")
    #print(f"📈 Spearman correlation: {spearman_corr:.4f} (p={spearman_p:.4e})")

    # Plotting
    plt.figure(figsize=(8, 6))
    plt.scatter(results_df['delta'], results_df['accuracy'], alpha=0.7)

    # Regression line
    z = np.polyfit(results_df['delta'], results_df['accuracy'], 1)
    p = np.poly1d(z)
    x_vals = np.sort(results_df['delta'])
    plt.plot(x_vals, p(x_vals), "r--", label=f"y={z[0]:.4f}x + {z[1]:.4f}")

    # Text annotation
    plt.text(0.05, 0.05,
             f"Pearson r: {pearson_corr:.4f} (p={pearson_p:.4e})\n"
             f"Spearman r: {spearman_corr:.4f} (p={spearman_p:.4e})",
             transform=plt.gca().transAxes,
             bbox=dict(facecolor='white', alpha=0.8))

    plt.xlabel("Delta Perplexity")
    plt.ylabel("Accuracy")
    plt.title(f"Accuracy vs. Delta Perplexity – Fold {fold}, Window {w}")
    plt.yticks(np.arange(0, 1.01, 0.05))
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    # Save plot
    plot_path = f"fig/{dataset}/accuracy_vs_delta_fold_{fold}_w{w}.png"
    plt.savefig(plot_path)
    plt.close()
    print(f"✅ Plot saved: {plot_path}")


In [None]:
def print_average_accuracy_not_overfitting_delta(epochs, folds, w, dataset):
    list_of_accuracies = []
    list_of_f1_ad = []
    list_of_f1_cn = []
    list_of_macro_f1 = []
    for fold in folds:
        try:
            # Load results
            results_df = pd.read_csv(f"{dataset}/{dataset}_results/with_delta_classification_results_fold_{fold}_w{w}.csv")

            # Load matrix as NumPy array
            matrix_df = pd.read_csv(f"{dataset}/{dataset}_matrices/matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index_col=0)
            matrix = matrix_df.values  # convert to NumPy array
            
            mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_ad_fold_{fold}_w{w}.csv", index_col=0)
            mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_cn_fold_{fold}_w{w}.csv", index_col=0)
        except Exception as e:
            print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
            return

        min_ad_epoch_idx = mean_ad_df.values.argmin()
        min_cn_epoch_idx = mean_cn_df.values.argmin()
        min_ad_epoch_val = epochs[min_ad_epoch_idx]
        min_cn_epoch_val = epochs[min_cn_epoch_idx]

        # Filter using actual epoch values
        results_df = results_df[
            (results_df['ad_epoch'] <= min_ad_epoch_val) & (results_df['cn_epoch'] <= min_cn_epoch_val)
        ]

        results_df.dropna(subset=['accuracy'], inplace=True)

        if len(results_df) < 2:
            print(f"⚠️ Not enough valid data to compute average accuracy for fold={fold}, w={w}")
            return

        # take the configuration with the minumum lower bound
        min_lower_bound = results_df['delta'].min()
        best_config = results_df[results_df['delta'] == min_lower_bound].iloc[0]
        average_accuracy = best_config['accuracy']
        #print(f"Fold {fold} - Best CN Epoch: {best_config['cn_epoch']}, Best AD Epoch: {best_config['ad_epoch']}, Accuracy: {average_accuracy:.4f}")

        f1_ad = best_config['f1_ad'] if 'f1_ad' in best_config else None
        f1_cn = best_config['f1_cn'] if 'f1_cn' in best_config else None
        macro_f1 = best_config['macro_f1'] if 'macro_f1' in best_config else None

        #print(f"AD predicted CN: {ad_predicted_cn}, CN predicted AD: {cn_predicted_ad}")
        #print("--------------------")
        
        list_of_accuracies.append(average_accuracy)
        list_of_f1_ad.append(f1_ad)
        list_of_f1_cn.append(f1_cn)
        list_of_macro_f1.append(macro_f1) 
    
    # Calculate the average accuracy across all folds
    if list_of_accuracies:
        average_accuracy = np.mean(list_of_accuracies)
        standard_deviation = np.std(list_of_accuracies)
        average_f1_ad = np.mean([f for f in list_of_f1_ad if f is not None])
        average_f1_cn = np.mean([f for f in list_of_f1_cn if f is not None])
        average_macro_f1 = np.mean([f for f in list_of_macro_f1 if f is not None])
        std_f1_macro_f1 = np.std([f for f in list_of_macro_f1 if f is not None])
        print(f"Average accuracy across folds - Dataset {dataset}: {average_accuracy:.4f} ± {standard_deviation:.4f}")
        print(f"Average F1 AD across folds - Dataset {dataset}: {average_f1_ad:.4f}")
        print(f"Average F1 CN across folds - Dataset {dataset}: {average_f1_cn:.4f}")
        print(f"Average Macro F1 across folds - Dataset {dataset}: {average_macro_f1:.4f}, ± {std_f1_macro_f1:.4f}")
    else:
        print(f"No valid accuracies found for- Window {w} - Dataset {dataset}.")
    return list_of_accuracies         

In [None]:
def print_average_accuracy_not_overfitting_baseline(epochs, folds, w, dataset):
    list_of_accuracies = []
    list_of_f1_ad = []
    list_of_f1_cn = []
    list_of_macro_f1 = []
    for fold in folds:
        try:
            # Load results
            results_df = pd.read_csv(f"{dataset}/{dataset}_results/classification_results_fold_{fold}_w{w}.csv")

            # Load matrices
            mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_ad_fold_{fold}_w{w}.csv", index_col=0)
            mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_cn_fold_{fold}_w{w}.csv", index_col=0)
        except Exception as e:
            print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
            return

        # Find epochs corresponding to minimum values
        min_ad_epoch_idx = mean_ad_df.values.argmin()
        min_cn_epoch_idx = mean_cn_df.values.argmin()
        min_ad_epoch_val = epochs[min_ad_epoch_idx]
        min_cn_epoch_val = epochs[min_cn_epoch_idx]

        # Select the row that exactly matches both min_ad_epoch and min_cn_epoch
        matching_row = results_df[
            (results_df['ad_epoch'] == min_ad_epoch_val) & 
            (results_df['cn_epoch'] == min_cn_epoch_val)
        ]

        if matching_row.empty:
            print(f"⚠️ No matching configuration found for min_ad={min_ad_epoch_val}, min_cn={min_cn_epoch_val} in fold={fold}")
            continue

        accuracy = matching_row.iloc[0]['accuracy']
        list_of_accuracies.append(accuracy)
        f1_ad = matching_row.iloc[0]['f1_ad'] if 'f1_ad' in matching_row.columns else None
        f1_cn = matching_row.iloc[0]['f1_cn'] if 'f1_cn' in matching_row.columns else None
        macro_f1 = matching_row.iloc[0]['macro_f1'] if 'macro_f1' in matching_row.columns else None
        list_of_f1_ad.append(f1_ad)
        list_of_f1_cn.append(f1_cn)
        list_of_macro_f1.append(macro_f1)

        #print(f"Fold {fold} - Best CN Epoch: {matching_row.iloc[0]['cn_epoch']}, Best AD Epoch: {matching_row.iloc[0]['ad_epoch']},  Accuracy: {accuracy:.4f}")
        
    # Compute and report overall average and standard deviation
    if list_of_accuracies:
        average_accuracy = np.mean(list_of_accuracies)
        standard_deviation = np.std(list_of_accuracies)
        average_f1_ad = np.mean([f for f in list_of_f1_ad if f is not None])
        average_f1_cn = np.mean([f for f in list_of_f1_cn if f is not None])
        average_macro_f1 = np.mean([f for f in list_of_macro_f1 if f is not None])
        std_macro_f1 = np.std([f for f in list_of_macro_f1 if f is not None])
        print(f"Average accuracy across folds - Dataset {dataset}: {average_accuracy:.4f} ± {standard_deviation:.4f}")
        print(f"Average F1 AD across folds - Dataset {dataset}: {average_f1_ad:.4f}")
        print(f"Average F1 CN across folds - Dataset {dataset}: {average_f1_cn:.4f}")
        print(f"Average Macro F1 across folds - Dataset {dataset}: {average_macro_f1:.4f}, ± {std_macro_f1:.4f}")
        return list_of_accuracies
    else:
        print(f"No valid accuracies found for any fold in Window {w} - Dataset {dataset}.")
        return None


In [None]:
def print_oracle_classification_without_overfitting(epochs, folds, w, dataset):
    list_of_avg_accuracies = []
    list_of_f1_ad = []
    list_of_f1_cn = []
    list_of_macro_f1 = []

    for fold in folds:
        try:
            # Load results
            results_df = pd.read_csv(f"{dataset}/{dataset}_results/with_delta_classification_results_fold_{fold}_w{w}.csv")

            # Load matrices
            mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_ad_fold_{fold}_w{w}.csv", index_col=0)
            mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/mean_cn_fold_{fold}_w{w}.csv", index_col=0)
        except Exception as e:
            print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
            return

        # Find epochs corresponding to minimum values
        min_ad_epoch_idx = mean_ad_df.values.argmin()
        min_cn_epoch_idx = mean_cn_df.values.argmin()
        min_ad_epoch_val = epochs[min_ad_epoch_idx]
        min_cn_epoch_val = epochs[min_cn_epoch_idx]
        

        # Filter only rows matching the no-overfitting condition
        filtered_df = results_df[
            (results_df['ad_epoch'] <= min_ad_epoch_val) &
            (results_df['cn_epoch'] <= min_cn_epoch_val)
        ]

        if filtered_df.empty:
            print(f"⚠️ No matching configuration found for min_ad={min_ad_epoch_val}, min_cn={min_cn_epoch_val} in fold={fold}")
            continue

        # Sort by accuracy and pick the top one
        matching_row = filtered_df.sort_values(by='accuracy', ascending=False).head(1)
        accuracy = matching_row.iloc[0]['accuracy']
        f1_ad = matching_row.iloc[0]['f1_ad'] if 'f1_ad' in matching_row.columns else None
        f1_cn = matching_row.iloc[0]['f1_cn'] if 'f1_cn' in matching_row.columns else None
        macro_f1 = matching_row.iloc[0]['macro_f1'] if 'macro_f1' in matching_row.columns else None
        
        list_of_avg_accuracies.append(accuracy)
        list_of_f1_ad.append(f1_ad)
        list_of_f1_cn.append(f1_cn)
        list_of_macro_f1.append(macro_f1)



    if list_of_avg_accuracies:
        average_accuracy = np.mean(list_of_avg_accuracies)
        standard_deviation = np.std(list_of_avg_accuracies)
        average_f1_ad = np.mean([f for f in list_of_f1_ad if f is not None])
        average_f1_cn = np.mean([f for f in list_of_f1_cn if f is not None])
        average_macro_f1 = np.mean([f for f in list_of_macro_f1 if f is not None])
        std_f1_macro_f1 = np.std([f for f in list_of_macro_f1 if f is not None])
        print(f"Average accuracy across folds - Dataset {dataset}: {average_accuracy:.4f} ± {standard_deviation:.4f}")
        print(f"Average F1 AD across folds - Dataset {dataset}: {average_f1_ad:.4f}")
        print(f"Average F1 CN across folds - Dataset {dataset}: {average_f1_cn:.4f}")
        print(f"Average Macro F1 across folds - Dataset {dataset}: {average_macro_f1:.4f}, ± {std_f1_macro_f1:.4f}")
        return list_of_avg_accuracies
    else:
        print(f"No valid accuracies found for any fold in Window {w} - Dataset {dataset}.")
        return None, None


In [None]:
def load_global_perplexities_only_same_class(base_dir, group, epochs, subj_classes, transcription_type, batch_size):
    patient_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    patient_dirs.sort()
    
    dict_ppl = {e: [] for e in epochs}
    
    for patient_dir in patient_dirs:
        if subj_classes[patient_dir] == group:
            patient_path = os.path.join(base_dir, patient_dir)
            for e in epochs:
                file_name = f"{patient_dir}_modello_{group}_{transcription_type}_{batch_size}b_{e}ep_global_ppl_score.txt"
                try:
                    with open(os.path.join(patient_path, file_name), 'r') as f:
                        perplexity = float(f.read().strip())
                        dict_ppl[e].append(perplexity)
                except FileNotFoundError:
                    continue

    return dict_ppl

def load_global_perplexities_only_inverse_class(base_dir, group, epochs, subj_classes, transcription_type, batch_size):
    patient_dirs = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]
    patient_dirs.sort()
    
    dict_ppl = {e: [] for e in epochs}
    
    for patient_dir in patient_dirs:
        if subj_classes[patient_dir] != group:
            patient_path = os.path.join(base_dir, patient_dir)
            for e in epochs:
                file_name = f"{patient_dir}_modello_{group}_{transcription_type}_{batch_size}b_{e}ep_global_ppl_score.txt"
                try:
                    with open(os.path.join(patient_path, file_name), 'r') as f:
                        perplexity = float(f.read().strip())
                        dict_ppl[e].append(perplexity)
                except FileNotFoundError:
                    continue

    return dict_ppl

In [None]:
def process_and_plot_global_same_class(w, epochs, fold, subj_classes, dataset, transcription_type, batch_size):

    base_dir = f"{dataset}/{dataset}_fold_{fold}/{dataset}_fold_{fold}_w{w}_l0/dev/{transcription_type}"

    dict_ad = load_global_perplexities_only_same_class(base_dir, "ad", epochs, subj_classes, transcription_type, batch_size)
    dict_cn = load_global_perplexities_only_same_class(base_dir, "cn", epochs, subj_classes, transcription_type, batch_size)
    
    dict_ad_model_cn_subjects = load_global_perplexities_only_inverse_class(base_dir, "ad", epochs, subj_classes, transcription_type, batch_size)
    dict_cn_model_ad_subjects = load_global_perplexities_only_inverse_class(base_dir, "cn", epochs, subj_classes, transcription_type, batch_size)

    mean_ad = [np.nanmean(dict_ad[e]) for e in epochs]
    mean_cn = [np.nanmean(dict_cn[e]) for e in epochs]
    
    mean_ad_model_cn_subjects = [np.nanmean(dict_ad_model_cn_subjects[e]) for e in epochs]
    mean_cn_model_ad_subjects = [np.nanmean(dict_cn_model_ad_subjects[e]) for e in epochs]

    # Plotting
    plt.figure(figsize=(10, 6))
    #plt.plot(epochs, mean_ad, marker='o', label='AD Model on AD Subjects', color='red', markersize=3)
    #plt.plot(epochs, mean_cn, marker='o', label='HC Model on HC Subjects', color='blue', markersize=3)
    #plt.plot(epochs, mean_ad_model_cn_subjects, marker='o', label='AD Model on HC Subjects', color='red', markersize=3, linestyle='--')
    #plt.plot(epochs, mean_cn_model_ad_subjects, marker='o', label='HC Model on AD Subjects', color='blue', markersize=3, linestyle='--')
    
    plt.plot(epochs, mean_ad, marker='o', color='red', markersize=4)
    plt.plot(epochs, mean_cn, marker='o', color='blue', markersize=4)
    
    # # i want to highlight the points 2 for mean_cn_model_ad_subjects and mean_cn and 5 for the other two
    # plt.scatter(2, mean_cn_model_ad_subjects[1], color='none', s=50, zorder=3, marker='o')
    #plt.scatter(2, mean_cn[1], color='blue', s=100, zorder=3, marker='D')
    # plt.scatter(5, mean_ad_model_cn_subjects[4], color='none', s=70, zorder=3, marker='o')
    #plt.scatter(5, mean_ad[4], color='red', s=100, zorder=3, marker='s')

    # Highlight points (hollow markers)
    # plt.scatter(2, mean_cn_model_ad_subjects[1], facecolors='none', edgecolors='blue',
    #             s=200, zorder=3, marker='o', linewidths=1.5)
    # plt.scatter(5, mean_ad_model_cn_subjects[4], facecolors='none', edgecolors='red',
    #             s=200, zorder=3, marker='o', linewidths=1.5)
            
    
    #plt.xlabel('Epoch')
    #plt.ylabel('Perplexity')
    #plt.title(f'Global Perplexity Evolution - Dev. Set - Fold {fold} - Window {w}')
    plt.grid(True, linestyle='--', alpha=0.7)
    
    #plt.xticks(ticks=[2, 4, 6, 8, 10, 12])  # ensures all epoch numbers are displayed
    #plt.xlim(min(epochs), max(epochs))  # adjust automatically if epochs ≠ 1–15
    plt.xticks(ticks=[5,10,15], fontsize=20) 
    plt.yticks(fontsize=20)
    
    plt.ylim(10,70)


    plt.legend()
    # for means, color in zip([mean_ad, mean_cn], ['red', 'blue']):
    #     for x, y in zip(epochs, means):
    #         plt.text(x, y + 0.02, f"{y:.2f}", ha='center', color=color)
    
    plt.tight_layout()
    
    plot_name = f"fig/{dataset}/same_class_perplexity_evolution_dev_set_fold_{fold}_w{w}"
    plt.savefig(f"{plot_name}.png")
    
    #plt.show()
    plt.close()

    # Matrice differenze (AD - CN)
    matrix = np.empty((len(epochs), len(epochs)))
    for i, ad_epoch in enumerate(epochs):
        for j, cn_epoch in enumerate(epochs):
            ad_vals = dict_ad[ad_epoch]
            cn_vals = dict_cn[cn_epoch]
            if ad_vals and cn_vals:  # check that both lists are not empty
                matrix[i, j] = np.mean(ad_vals) - np.mean(cn_vals)
            else:
                matrix[i, j] = np.nan
    
    # Save the matrix to a CSV file
    matrix_df = pd.DataFrame(matrix, index=epochs, columns=epochs)
    matrix_df.to_csv(f"{dataset}/{dataset}_matrices/same_class_matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index=True, header=True)

    mean_ad_df = pd.DataFrame(mean_ad, index=epochs, columns=['Mean AD'])
    mean_ad_df.to_csv(f"{dataset}/{dataset}_matrices/same_class_mean_ad_fold_{fold}_w{w}.csv", index=True, header=True)
    mean_cn_df = pd.DataFrame(mean_cn, index=epochs, columns=['Mean CN'])
    mean_cn_df.to_csv(f"{dataset}/{dataset}_matrices/same_class_mean_cn_fold_{fold}_w{w}.csv", index=True, header=True)        

In [None]:
def print_correlation_ppl_valid_classific_test_same_class(epochs, fold, w, dataset):
    try:
        # Load results
        results_df = pd.read_csv(f"{dataset}/{dataset}_results/classification_results_fold_{fold}_w{w}.csv")

        # Load matrix as NumPy array
        matrix_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index_col=0)
        matrix = matrix_df.values  # convert to NumPy array
    except Exception as e:
        print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
        return

    # Compute deltas using index lookup
    deltas = []
    for _, row in results_df.iterrows():
        try:
            i = epochs.index(row['ad_epoch'])
            j = epochs.index(row['cn_epoch'])
            deltas.append(abs(matrix[i, j]))
        except Exception:
            deltas.append(np.nan)

    results_df['delta'] = deltas
    results_df.dropna(subset=['accuracy', 'delta'], inplace=True)

    if len(results_df) < 2:
        print(f"⚠️ Not enough valid data to compute correlation for fold={fold}, w={w}")
        return

    # Correlation computations
    pearson_corr, pearson_p = pearsonr(results_df['delta'], results_df['accuracy'])
    spearman_corr, spearman_p = spearmanr(results_df['delta'], results_df['accuracy'])
    
    results_df.to_csv(f"{dataset}/{dataset}_results/same_class_with_delta_classification_results_fold_{fold}_w{w}.csv", index=False) 


    #print(f"📊 Pearson correlation: {pearson_corr:.4f} (p={pearson_p:.4e})")
    #print(f"📈 Spearman correlation: {spearman_corr:.4f} (p={spearman_p:.4e})")

    # Plotting
    plt.figure(figsize=(8, 6))
    plt.scatter(results_df['delta'], results_df['accuracy'], alpha=0.7)
    

    # Regression line
    z = np.polyfit(results_df['delta'], results_df['accuracy'], 1)
    p = np.poly1d(z)
    x_vals = np.sort(results_df['delta'])
    plt.plot(x_vals, p(x_vals), "r--", label=f"y={z[0]:.4f}x + {z[1]:.4f}")

    # Text annotation
    plt.text(0.05, 0.05,
             f"Pearson r: {pearson_corr:.4f} (p={pearson_p:.4e})\n"
             f"Spearman r: {spearman_corr:.4f} (p={spearman_p:.4e})",
             transform=plt.gca().transAxes,
             bbox=dict(facecolor='white', alpha=0.8))

    plt.xlabel("Delta Perplexity")
    plt.ylabel("Accuracy")
    plt.title(f"Accuracy vs. Delta Perplexity – Fold {fold}, Window {w}")
    plt.yticks(np.arange(0, 1.01, 0.05))
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    # Save plot
    plot_path = f"fig/{dataset}/same_class_accuracy_vs_delta_fold_{fold}_w{w}.png"
    plt.savefig(plot_path)
    plt.close()
    print(f"✅ Plot saved: {plot_path}")


In [None]:
def print_correlation_ppl_valid_classific_test_not_overfitting_same_class(epochs, fold, w, dataset):
    try:
        # Load results
        results_df = pd.read_csv(f"{dataset}/{dataset}_results/classification_results_fold_{fold}_w{w}.csv")

        # Load matrix as NumPy array
        matrix_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index_col=0)
        matrix = matrix_df.values  # convert to NumPy array
        
        mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_mean_ad_fold_{fold}_w{w}.csv", index_col=0)
        mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_mean_cn_fold_{fold}_w{w}.csv", index_col=0)
    except Exception as e:
        print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
        return

    min_ad_epoch_idx = mean_ad_df.values.argmin()
    min_cn_epoch_idx = mean_cn_df.values.argmin()
    min_ad_epoch_val = epochs[min_ad_epoch_idx]
    min_cn_epoch_val = epochs[min_cn_epoch_idx]

    #print(f"Minimum AD perplexity at epoch: {min_ad_epoch_val} with value {mean_ad_df.values[min_ad_epoch_idx][0]:.2f}")

    # Filter using actual epoch values
    results_df = results_df[
        (results_df['ad_epoch'] <= min_ad_epoch_val) & (results_df['cn_epoch'] <= min_cn_epoch_val)
    ]

    
    #print(results_df)

    # Compute error bounds *after* filtering
    deltas = []
    for _, row in results_df.iterrows():
        try:
            i = epochs.index(row['ad_epoch'])
            j = epochs.index(row['cn_epoch'])
            deltas.append(abs(matrix[i, j]))
        except Exception:
            deltas.append(np.nan)

    results_df['delta'] = deltas
    results_df.dropna(subset=['accuracy', 'delta'], inplace=True)

    if len(results_df) < 2:
        print(f"⚠️ Not enough valid data to compute correlation for fold={fold}, w={w}")
        return

    # Correlation computations
    pearson_corr, pearson_p = pearsonr(results_df['delta'], results_df['accuracy'])
    spearman_corr, spearman_p = spearmanr(results_df['delta'], results_df['accuracy'])

        
    #print(f"📊 Pearson correlation: {pearson_corr:.4f} (p={pearson_p:.4e})")
    #print(f"📈 Spearman correlation: {spearman_corr:.4f} (p={spearman_p:.4e})")

    # Plotting
    plt.figure(figsize=(8, 6))
    plt.scatter(results_df['delta'], results_df['accuracy'], alpha=0.7)

    # Regression line
    z = np.polyfit(results_df['delta'], results_df['accuracy'], 1)
    p = np.poly1d(z)
    x_vals = np.sort(results_df['delta'])
    plt.plot(x_vals, p(x_vals), "r--", label=f"y={z[0]:.4f}x + {z[1]:.4f}")

    # Text annotation
    plt.text(0.05, 0.05,
             f"Pearson r: {pearson_corr:.4f} (p={pearson_p:.4e})\n"
             f"Spearman r: {spearman_corr:.4f} (p={spearman_p:.4e})",
             transform=plt.gca().transAxes,
             bbox=dict(facecolor='white', alpha=0.8))

    plt.xlabel("Delta Perplexity")
    plt.ylabel("Accuracy")
    plt.title(f"Accuracy vs. Delta Perplexity – Fold {fold}, Window {w}")
    plt.yticks(np.arange(0, 1.01, 0.05))
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    # Save plot
    plot_path = f"fig/{dataset}/same_class_not_overfitting_accuracy_vs_delta_fold_{fold}_w{w}.png"
    plt.savefig(plot_path)
    plt.close()
    print(f"✅ Plot saved: {plot_path}")


In [None]:
def print_average_accuracy_not_overfitting_baseline_same_class(epochs, folds, w, dataset):
    list_of_accuracies = []
    list_of_f1_ad = []
    list_of_f1_cn = []
    list_of_macro_f1 = []

    for fold in folds:
        try:
            # Load results
            results_df = pd.read_csv(f"{dataset}/{dataset}_results/same_class_with_delta_classification_results_fold_{fold}_w{w}.csv")

            # Load matrices
            mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_mean_ad_fold_{fold}_w{w}.csv", index_col=0)
            mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_mean_cn_fold_{fold}_w{w}.csv", index_col=0)
        except Exception as e:
            print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
            return

        # Find epochs corresponding to minimum values
        min_ad_epoch_idx = mean_ad_df.values.argmin()
        min_cn_epoch_idx = mean_cn_df.values.argmin()
        min_ad_epoch_val = epochs[min_ad_epoch_idx]
        min_cn_epoch_val = epochs[min_cn_epoch_idx]

        # Select the row that exactly matches both min_ad_epoch and min_cn_epoch
        matching_row = results_df[
            (results_df['ad_epoch'] == min_ad_epoch_val) & 
            (results_df['cn_epoch'] == min_cn_epoch_val)
        ]

        if matching_row.empty:
            print(f"⚠️ No matching configuration found for min_ad={min_ad_epoch_val}, min_cn={min_cn_epoch_val} in fold={fold}")
            continue

        accuracy = matching_row.iloc[0]['accuracy']
        list_of_accuracies.append(accuracy)
        f1_ad = matching_row.iloc[0]['f1_ad'] if 'f1_ad' in matching_row.columns else None
        f1_cn = matching_row.iloc[0]['f1_cn'] if 'f1_cn' in matching_row.columns else None
        macro_f1 = matching_row.iloc[0]['macro_f1'] if 'macro_f1' in matching_row.columns else None
        list_of_f1_ad.append(f1_ad)
        list_of_f1_cn.append(f1_cn)
        list_of_macro_f1.append(macro_f1)

    # Compute and report overall average and standard deviation
    if list_of_accuracies:
        average_accuracy = np.mean(list_of_accuracies)
        standard_deviation = np.std(list_of_accuracies)
        average_f1_ad = np.mean([f for f in list_of_f1_ad if f is not None])
        average_f1_cn = np.mean([f for f in list_of_f1_cn if f is not None])
        average_macro_f1 = np.mean([f for f in list_of_macro_f1 if f is not None])
        std_macro_f1 = np.std([f for f in list_of_macro_f1 if f is not None])
        print(f"Average accuracy across folds for dataset {dataset}: {average_accuracy:.4f} ± {standard_deviation:.4f}")
        print(f"Average F1 AD across folds - Dataset {dataset}: {average_f1_ad:.4f}")
        print(f"Average F1 CN across folds - Dataset {dataset}: {average_f1_cn:.4f}")
        print(f"Average Macro F1 across folds - Dataset {dataset}: {average_macro_f1:.4f}, ± {std_macro_f1:.4f}")
        return average_accuracy
    else:
        print(f"No valid accuracies found for any fold in Window {w} - Dataset {dataset}.")
        return None


In [None]:
def print_average_accuracy_not_overfitting_delta_same_class(epochs, folds, w, dataset):
    list_of_accuracies = []
    list_of_f1_ad = []
    list_of_f1_cn = []
    list_of_macro_f1 = []

    for fold in folds:
        try:
            # Load results
            results_df = pd.read_csv(f"{dataset}/{dataset}_results/same_class_with_delta_classification_results_fold_{fold}_w{w}.csv")

            # Load matrix as NumPy array
            matrix_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_matrix_diff_ad_cn_fold_{fold}_w{w}.csv", index_col=0)
            matrix = matrix_df.values  # convert to NumPy array
            
            mean_ad_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_mean_ad_fold_{fold}_w{w}.csv", index_col=0)
            mean_cn_df = pd.read_csv(f"{dataset}/{dataset}_matrices/same_class_mean_cn_fold_{fold}_w{w}.csv", index_col=0)
        except Exception as e:
            print(f"❌ Failed to load data for fold {fold}, w {w}: {e}")
            return

        min_ad_epoch_idx = mean_ad_df.values.argmin()
        min_cn_epoch_idx = mean_cn_df.values.argmin()
        min_ad_epoch_val = epochs[min_ad_epoch_idx]
        min_cn_epoch_val = epochs[min_cn_epoch_idx]

        # Filter using actual epoch values
        results_df = results_df[
            (results_df['ad_epoch'] <= min_ad_epoch_val) & (results_df['cn_epoch'] <= min_cn_epoch_val)
        ]

        results_df.dropna(subset=['accuracy'], inplace=True)

        if len(results_df) < 2:
            print(f"⚠️ Not enough valid data to compute average accuracy for fold={fold}, w={w}")
            return

        # take the configuration with the minumum lower bound
        min_lower_bound = results_df['delta'].min()
        best_config = results_df[results_df['delta'] == min_lower_bound].iloc[0]
        average_accuracy = best_config['accuracy']
        list_of_accuracies.append(average_accuracy)
        f1_ad = best_config['f1_ad'] if 'f1_ad' in best_config else None
        f1_cn = best_config['f1_cn'] if 'f1_cn' in best_config else None
        macro_f1 = best_config['macro_f1'] if 'macro_f1' in best_config else None
        list_of_f1_ad.append(f1_ad)
        list_of_f1_cn.append(f1_cn)
        list_of_macro_f1.append(macro_f1)
        
    # Calculate the average accuracy across all folds
    if list_of_accuracies:
        average_accuracy = np.mean(list_of_accuracies)
        standard_deviation = np.std(list_of_accuracies)
        average_f1_ad = np.mean([f for f in list_of_f1_ad if f is not None])
        average_f1_cn = np.mean([f for f in list_of_f1_cn if f is not None])
        average_macro_f1 = np.mean([f for f in list_of_macro_f1 if f is not None])
        std_macro_f1 = np.std([f for f in list_of_macro_f1 if f is not None])
        print(f"Average accuracy across folds for dataset {dataset}: {average_accuracy:.4f} ± {standard_deviation:.4f}")
        print(f"Average F1 AD across folds - Dataset {dataset}: {average_f1_ad:.4f}")
        print(f"Average F1 CN across folds - Dataset {dataset}: {average_f1_cn:.4f}")
        print(f"Average Macro F1 across folds - Dataset {dataset}: {average_macro_f1:.4f}, ± {std_macro_f1:.4f}")
    else:
        print(f"No valid accuracies found for- Window {w} - Dataset {dataset}.")
    return average_accuracy         

# main both classes

The possible datasets are:
- adress
- adresso
- adress_imb_60
- adresso_imb_60
- adress_imb_40
- adresso_imb_40
- adress_imb_20
- adresso_imb_20

In [None]:
folds = [2]
batch_size = 12
w = 20
epochs = list(range(1, 16, 1))
disease_class = 'ad'  
control_class = 'cn'

list_datasets = ["adress", "adresso", "adress_imb_60", "adresso_imb_60", "adress_imb_40", "adresso_imb_40", "adress_imb_20", "adresso_imb_20"]

list_datasets = ["adress", "adress_imb_60", "adress_imb_40", "adress_imb_20"]

In [None]:
for dataset in list_datasets:
    transcription_type = "manual" if dataset.split("_")[0] == "adress" else "whisper-large-v3-turbo"

    subj_classes = {}
    
    for fold in folds:
        print(f"\n=== Processing Fold {fold} ===")

        subj_classes[fold] = pd.read_csv(f"{dataset}/labels_fold_{fold}.csv", sep=';')
        # make it a dictionary with patient_id as key and label as value
        subj_classes[fold] = dict(zip(subj_classes[fold]['patient_id'], subj_classes[fold]['label']))

        try:
            process_and_plot_global(w=w, epochs=epochs, fold=fold, dataset=dataset, transcription_type=transcription_type, batch_size=batch_size, disease_class=disease_class, control_class=control_class)
            process_and_plot_global_same_class(w=w, epochs=epochs, fold=fold, subj_classes = subj_classes[fold], dataset=dataset, transcription_type=transcription_type, batch_size=batch_size)

            print(f"✅ Global perplexity processing completed for fold {fold}")
        except Exception as e:
            print(f"❌ Error during dev processing (fold={fold}: {e}")
            continue
        

        # Step 2: Test classification
        try:
            base_dir_test = f"{dataset}/{dataset}_fold_{fold}/{dataset}_fold_{fold}_w{w}_l0/test/{transcription_type}"
            classification_test(base_dir_test, epochs, fold, w, dataset, transcription_type, batch_size, disease_class, control_class)
            print(f"✅ Classification test completed for fold {fold}, window {w}")
        except Exception as e:
            print(f"❌ Error during classification (fold={fold}, w={w}): {e}")
            continue
        
        # Step 3: Correlation analysis
        try:
            print_correlation_ppl_valid_classific_test(epochs, fold, w, dataset)
            print_correlation_ppl_valid_classific_test_not_overfitting(epochs, fold, w, dataset)
            print_correlation_ppl_valid_classific_test_same_class(epochs, fold, w, dataset)
            print_correlation_ppl_valid_classific_test_not_overfitting_same_class(epochs, fold, w, dataset)
            print(f"✅ Correlation analysis completed for fold {fold}, window {w}")
        except Exception as e:
            print(f"❌ Error during correlation analysis not overfitting (fold={fold}, w={w}): {e}") 
            
        

In [None]:
for dataset in list_datasets:
    print(f"\n\n================== FINAL RESULTS DATASET {dataset} ==================")
    print(f"BASELINE SC: Using the pair composed by min_AD and min_CN perplexity for each fold:")
    average_accuracy_min_ad_min_cn = print_average_accuracy_not_overfitting_baseline_same_class(epochs, folds, w, dataset)   
    print("***************************************************************************************")
     
    print(f"BASELINE BC:")
    average_accuracy_min_ad_min_cn = print_average_accuracy_not_overfitting_baseline(epochs, folds, w, dataset)   
    print("***************************************************************************************")
    
    print(f"DELTA SC:")
    average_accuracy = print_average_accuracy_not_overfitting_delta_same_class(epochs, folds, w, dataset)
    print("***************************************************************************************")
    
 
    print(f"DELTA BC:")
    average_accuracy = print_average_accuracy_not_overfitting_delta(epochs, folds, w, dataset)
    print("***************************************************************************************")
    
    print(f"ORACLE:")
    average_accuracy_top = print_oracle_classification_without_overfitting(epochs, folds, w, dataset)
    print(f"---------------------------------------------------------------------------")
    print(f"---------------------------------------------------------------------------")