In [1]:
import os
import pandas as pd

# Define the folder containing the CSV files
folder_path = 'results'

# Get a list of all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if 'grid' in f]
# Initialize an empty list to store dataframes
df_list = []
# Loop through each file, read it, and append the dataframe to the list
for file in csv_files:
    file_path = os.path.join(folder_path, file)  # Full file path
    df = pd.read_csv(file_path)  # Read the CSV file
    df_list.append(df)  # Add the dataframe to the list
# Concatenate all dataframes into a single dataframe
df_combined = pd.concat(df_list, ignore_index=True)
# Now df_combined has all the data from the 10 folds
print(df_combined.head())


   K    Distance   Voting scheme      Weight scheme  Accuracy  Precision  \
0  1  minkowski1  Majority_class  Mutual_classifier  0.946809   0.947619   
1  3  minkowski1  Majority_class  Mutual_classifier  0.946809   0.947011   
2  5  minkowski1  Majority_class  Mutual_classifier  0.962766   0.964079   
3  7  minkowski1  Majority_class  Mutual_classifier  0.968085   0.968297   
4  1  minkowski1  Majority_class             Relief  0.728723   0.728957   

     Recall        F1  Solving Time  
0  0.946809  0.946784      1.418221  
1  0.946809  0.946802      1.395949  
2  0.962766  0.962740      1.406002  
3  0.968085  0.968081      1.746111  
4  0.728723  0.728654      6.448211  


In [2]:
grouped_df = df_combined.groupby(['K', 'Distance', 'Voting scheme', 'Weight scheme'])

# Compute mean and standard deviation of the relevant metrics
metrics_summary = grouped_df.agg({
    'Accuracy': ['mean', 'std'],
    'Precision': ['mean', 'std'],
    'Recall': ['mean', 'std'],
    'F1': ['mean', 'std'],
    'Solving Time': ['mean', 'std']
}).reset_index()

# Rename the columns for clarity
metrics_summary.columns = ['K', 'Distance', 'Voting scheme', 'Weight scheme',
                           'Accuracy_mean', 'Accuracy_std',
                           'Precision_mean', 'Precision_std',
                           'Recall_mean', 'Recall_std',
                           'F1_mean', 'F1_std',
                           'Solving Time_mean', 'Solving Time_std']

# Show the metrics summary
print(metrics_summary)

     K    Distance             Voting scheme      Weight scheme  \
0    1        HEOM  Inverse_Distance_Weights              ANOVA   
1    1        HEOM  Inverse_Distance_Weights  Mutual_classifier   
2    1        HEOM  Inverse_Distance_Weights             Relief   
3    1        HEOM            Majority_class              ANOVA   
4    1        HEOM            Majority_class  Mutual_classifier   
..  ..         ...                       ...                ...   
103  7  minkowski2            Majority_class  Mutual_classifier   
104  7  minkowski2            Majority_class             Relief   
105  7  minkowski2            Sheppards_Work              ANOVA   
106  7  minkowski2            Sheppards_Work  Mutual_classifier   
107  7  minkowski2            Sheppards_Work             Relief   

     Accuracy_mean  Accuracy_std  Precision_mean  Precision_std  Recall_mean  \
0         0.713431      0.030017        0.720178       0.036691     0.713431   
1         0.952848      0.012636   