In [1]:
import os
import pandas as pd

In [4]:
def extract_metrics(base_dir, epochs, output_file):
    # Metrics to extract
    metrics = [
        "epoch",
        "train/box_loss",
        "train/cls_loss",
        "train/dfl_loss",
        "metrics/precision(B)",
        "metrics/recall(B)",
        "metrics/mAP50-95(B)",
    ]

    # List to store results from all runs
    aggregated_results = []

    # Loop through each run directory
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file == "results.csv":
                file_path = os.path.join(root, file)
                print(f"Processing: {file_path}")

                try:
                    # Load the CSV file
                    df = pd.read_csv(file_path)

                    # Clean column names to ensure consistency
                    df.columns = df.columns.str.strip()

                    # Debug: Print column names
                    print(f"Columns in {file_path}: {df.columns.tolist()}")

                    # Check if required columns exist
                    missing_columns = [col for col in metrics if col not in df.columns]
                    if missing_columns:
                        print(f"Missing columns in {file_path}: {missing_columns}")
                        continue

                    # Filter the rows for the specified epochs
                    df_filtered = df[df["epoch"].isin(epochs)]

                    # Extract only the specified metrics
                    df_filtered = df_filtered[metrics]

                    # Add run identifier
                    run_name = os.path.basename(os.path.dirname(file_path))
                    df_filtered.insert(0, "run", run_name)

                    # Append to aggregated results
                    aggregated_results.append(df_filtered)
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

    # Concatenate all results into a single DataFrame
    if aggregated_results:
        final_df = pd.concat(aggregated_results, ignore_index=True)

        # Save to CSV
        final_df.to_csv(output_file, index=False)
        print(f"Metrics saved to {output_file}")
    else:
        print("No results found.")


# Define parameters
base_directory = "runs/train"
epochs_to_extract = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
output_csv = "aggregated_metrics.csv"

# Run the function
extract_metrics(base_directory, epochs_to_extract, output_csv)

Processing: runs/train\testing3\results.csv
Columns in runs/train\testing3\results.csv: ['epoch', 'train/box_loss', 'train/cls_loss', 'train/dfl_loss', 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)', 'val/box_loss', 'val/cls_loss', 'val/dfl_loss', 'lr/pg0', 'lr/pg1', 'lr/pg2']
Processing: runs/train\testing4\results.csv
Columns in runs/train\testing4\results.csv: ['epoch', 'train/box_loss', 'train/cls_loss', 'train/dfl_loss', 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)', 'val/box_loss', 'val/cls_loss', 'val/dfl_loss', 'lr/pg0', 'lr/pg1', 'lr/pg2']
Processing: runs/train\train\results.csv
Columns in runs/train\train\results.csv: ['epoch', 'train/box_loss', 'train/cls_loss', 'train/dfl_loss', 'metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)', 'val/box_loss', 'val/cls_loss', 'val/dfl_loss', 'lr/pg0', 'lr/pg1', 'lr/pg2']
Processing: runs/train\train10\results.csv
Columns i