In [1]:
import os
import pandas as pd

# Define the directory containing the score files
score_files_dir = "C:/Users/aim2r/Desktop/GUI/V1_code/metrics/"

# Function to extract metrics from each file
def extract_metrics(file_path):
    # Extract the machine learning method name from the filename
    file_name = os.path.basename(file_path)
    ml_method = file_name.replace("scores_", "").replace(".txt", "")

    metrics_values = {}
    with open(file_path, 'r') as file:
        lines = file.readlines()
        current_method = None
        for line in lines:
            if "Encoding method:" in line:
                # Extract the encoding method name
                current_method = line.split(':')[1].strip().replace("_encoded.csv", "")
            elif current_method:  # Assuming similar structure for Accuracy, Precision, Recall, and F1 Score
                parts = line.split(':')
                metric_name = parts[0].strip()
                if metric_name in ['Average Accuracy', 'Average Precision', 'Average Recall', 'Average F1 score', 'Average MCC']:  # Check if it's a relevant metric
                    metric_value = float(parts[1].strip()) 
                    if current_method not in metrics_values:
                        metrics_values[current_method] = {}
                    if metric_name not in metrics_values[current_method]:
                        metrics_values[current_method][metric_name] = []
                    metrics_values[current_method][metric_name].append(metric_value)
    return ml_method, metrics_values

# List all score files in the directory that start with "scores"
score_files = [file for file in os.listdir(score_files_dir) if file.startswith("scores")]
all_metrics = {}

# Extract metrics from each file and collect them into a dictionary
for file in score_files:
    score_file_path = os.path.join(score_files_dir, file)
    ml_method, metrics_values = extract_metrics(score_file_path)
    for method, metrics in metrics_values.items():
        if method not in all_metrics:
            all_metrics[method] = {}
        for metric_name, values in metrics.items():
            if metric_name not in all_metrics[method]:
                all_metrics[method][metric_name] = []
            all_metrics[method][metric_name].extend([(ml_method, v) for v in values])

# Convert the dictionary to a DataFrame for plotting
data = []
for method, metrics in all_metrics.items():
    for metric_name, values in metrics.items():
        for ml_method, value in values:
            data.append({'ML Method': ml_method, 'Encoding Method': method, 'Metric': metric_name, 'Value': value})

df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('performance_all.csv', index=False)
