In [None]:
import os
import pandas as pd

# Directory path containing the CSV files
directory_path = '/home/ldy/Workspace/BrainAligning_retrieval/outputs/contrast/ATMS_classification_50'

# Function to calculate the maximums of the metrics
def calculate_maximums(file_path):
    df = pd.read_csv(file_path)
    maximums = {
        'v2_acc': df['v2_acc'].max(),
        'v4_acc': df['v4_acc'].max(),
        'v10_acc': df['v10_acc'].max(),
        'test_accuracy': df['test_accuracy'].max(),  # TOP1 accuracy
        'top5_acc': df['top5_acc'].max()  # TOP5 accuracy
    }
    return maximums

# Dictionary to store the results
results = {}

# Define the output file name
output_file_name = 'subject_averages.csv'

# Check if the output file already exists and skip it
output_file_path = os.path.join(directory_path, output_file_name)
if os.path.exists(output_file_path):
    os.remove(output_file_path)

# Iterate over each file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.csv') and filename != output_file_name:
        # Extract the method name from the filename
        method_name = filename.split('_')[0]
        file_path = os.path.join(directory_path, filename)
        maximums = calculate_maximums(file_path)
        
        # Aggregate the results by method name
        if method_name not in results:
            results[method_name] = {metric: [] for metric in maximums}
        for metric in maximums:
            results[method_name][metric].append(maximums[metric])

# Calculate the average for each method and metric
averages = {method: {metric: sum(values) / len(values) for metric, values in metrics.items()}
            for method, metrics in results.items()}

# Sort the results by method name
sorted_averages = dict(sorted(averages.items()))

# Convert the sorted averages dictionary to a pandas DataFrame for easier CSV output
averages_df = pd.DataFrame.from_dict(sorted_averages, orient='index')

# You may want to include more descriptive column names here
averages_df.columns = ['V2 Accuracy', 'V4 Accuracy', 'V10 Accuracy', 'Test Accuracy', 'Top5 Accuracy']

# Save the DataFrame to a CSV file
averages_df.to_csv(output_file_path, index_label='Method')

print(f"Averages saved to {output_file_path}")

In [5]:
file_path = '/home/ldy/Workspace/BrainAligning_retrieval/outputs/contrast/ATMS_classification_50/sub-01/03-07_13-04/ATMS_classification_50_sub-01.csv'

maximums = calculate_maximums(file_path)
print(f"Maximums from {file_path}: {maximums}")

Maximums from /home/ldy/Workspace/BrainAligning_retrieval/outputs/contrast/ATMS_classification_50/sub-01/03-07_13-04/ATMS_classification_50_sub-01.csv: {'v2_acc': 0.775, 'v4_acc': 0.555, 'v10_acc': 0.36, 'test_accuracy': 0.065, 'top5_acc': 0.175, 'v50_acc': 0.15, 'v100_acc': 0.1, 'v50_top5_acc': 0.385, 'v100_top5_acc': 0.265}


In [1]:
import os
import pandas as pd
from datetime import datetime

def calculate_maximums(file_path):
    df = pd.read_csv(file_path)
    maximums = {
        'v2_acc': df['v2_acc'].max(),
        'v4_acc': df['v4_acc'].max(),
        'v10_acc': df['v10_acc'].max(),
        'test_accuracy': df['test_accuracy'].max(),
        'top5_acc': df['top5_acc'].max(),
        'v50_acc': df['v50_acc'].max(),
        'v100_acc': df['v100_acc'].max(),
        'v50_top5_acc': df['v50_top5_acc'].max(),
        'v100_top5_acc': df['v100_top5_acc'].max()
    }
    return maximums

def get_latest_directory(directories):
    latest_dir = None
    latest_time = None
    for directory in directories:
        try:
            # Adjusted to match the 'MM-DD_HH-MM' format
            directory_time = datetime.strptime(directory, "%m-%d_%H-%M")
            if latest_time is None or directory_time > latest_time:
                latest_time = directory_time
                latest_dir = directory
        except ValueError:
            continue  # Ignore directories that do not match the expected format
    return latest_dir

base_directory_path = '/home/ldy/Workspace/BrainAligning_retrieval/outputs/contrast/ATMS_50'

results = {}

for subject_folder in os.listdir(base_directory_path):
    subject_folder_path = os.path.join(base_directory_path, subject_folder)
    if os.path.isdir(subject_folder_path):
        time_directories = [d for d in os.listdir(subject_folder_path) if os.path.isdir(os.path.join(subject_folder_path, d))]
        latest_directory = get_latest_directory(time_directories)
        if latest_directory:
            latest_directory_path = os.path.join(subject_folder_path, latest_directory)
            for filename in os.listdir(latest_directory_path):
                if filename.endswith('.csv'):
                    method_name = filename.split('_')[0]
                    file_path = os.path.join(latest_directory_path, filename)
                    maximums = calculate_maximums(file_path)
                    if method_name not in results:
                        results[method_name] = {metric: [] for metric in maximums}
                    for metric in maximums:
                        results[method_name][metric].append(maximums[metric])

averages = {method: {metric: sum(values) / len(values) for metric, values in metrics.items()} for method, metrics in results.items()}
sorted_averages = dict(sorted(averages.items()))

averages_df = pd.DataFrame.from_dict(sorted_averages, orient='index')
averages_df.columns = ['V2 Accuracy', 'V4 Accuracy', 'V10 Accuracy', 'Test Accuracy', 'Top5 Accuracy', 'V50 Accuracy', 'V100 Accuracy', 'V50 Top5 Accuracy', 'V100 Top5 Accuracy']

output_file_name = 'subject_averages.csv'
output_file_path = os.path.join(base_directory_path, output_file_name)
averages_df.to_csv(output_file_path, index_label='Method')

print(f"Averages saved to {output_file_path}")

Averages saved to /home/ldy/Workspace/BrainAligning_retrieval/outputs/contrast/ATMS_50/subject_averages.csv


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

output_file_path = "/home/ldy/Workspace/BrainAligning_retrieval/outputs/contrast/ATMS_50/subject_averages.csv"

df = pd.read_csv(output_file_path)

metrics = df.describe().round(5).iloc[1:]*100  # Exclude the count row and round to two decimal places
print(metrics)

      V2 Accuracy  V4 Accuracy  V10 Accuracy  Test Accuracy  Top5 Accuracy  \
mean         97.4        91.95         80.35          30.75          63.15   
std           NaN          NaN           NaN            NaN            NaN   
min          97.4        91.95         80.35          30.75          63.15   
25%          97.4        91.95         80.35          30.75          63.15   
50%          97.4        91.95         80.35          30.75          63.15   
75%          97.4        91.95         80.35          30.75          63.15   
max          97.4        91.95         80.35          30.75          63.15   

      V50 Accuracy  V100 Accuracy  V50 Top5 Accuracy  V100 Top5 Accuracy  
mean         54.15          41.25              87.05               75.95  
std            NaN            NaN                NaN                 NaN  
min          54.15          41.25              87.05               75.95  
25%          54.15          41.25              87.05               75.95  
