In [3]:
import json
import pandas as pd
import numpy as np
from pathlib import Path

def extract_experiment_data(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)

    # Extract metadata
    model_family = data.get('model_family', 'unknown')
    peft_method = data.get('peft', 'unknown')
    task = data.get('task', 'unknown')

    # Mapping of JSON keys to requested Variant names
    variant_mapping = {
        "teacher_fft_results": "fft",
        "teacher_lora_results": "lora",
        "student_lora_results": "kd-lora"
    }

    rows = []

    for json_key, variant_name in variant_mapping.items():
        res = data.get(json_key)
        if not res:
            continue

        # Get metrics
        # Some tasks use eval_accuracy, others eval_matthews_correlation
        accuracy = res.get('eval_accuracy') or res.get('eval_matthews_correlation') or 0.0
        eval_runtime = res.get('eval_runtime', 0.0)

        # Get training-specific metrics
        train_data = res.get('train', {})
        trainable_params = train_data.get('trainable_params_count', 0.0)
        train_runtime = train_data.get('train_time', 0.0)

        # Calculate Average GPU Memory (Allocated)
        memory_list = train_data.get('memory_allocated', [])
        avg_memory = np.mean(memory_list) if memory_list else 0.0

        rows.append({
            "Model Family": model_family,
            "PEFT Method": peft_method,
            "Task": task,
            "Variant": variant_name,
            "Accuracy/Metric": round(accuracy, 4),
            "Trainable Params (M)": round(trainable_params, 4),
            "Train Runtime (s)": round(train_runtime, 2),
            "Eval Runtime (s)": round(eval_runtime, 2),
            "Avg GPU Memory (MB)": round(avg_memory, 2)
        })

    return pd.DataFrame(rows)

def aggregate_experiment_results(root_dir):
    """
    Finds all .json files under a directory recursively, extracts data,
    and concatenates them into one large DataFrame.
    """
    root_path = Path(root_dir)
    # Recursively find all JSON files
    json_files = list(root_path.rglob("*.json"))

    if not json_files:
        print(f"No JSON files found in {root_dir}")
        return pd.DataFrame()

    all_dfs = []
    for f in json_files:
        df = extract_experiment_data(f)
        if not df.empty:
            all_dfs.append(df)

    if not all_dfs:
        print("No valid data extracted from found files.")
        return pd.DataFrame()

    # Concatenate all individual DataFrames by row
    final_df = pd.concat(all_dfs, ignore_index=True)

    # Sort for better readability
    sort_cols = ["Task", "Model Family", "Variant"]
    final_df = final_df.sort_values(by=[c for c in sort_cols if c in final_df.columns])

    return final_df


In [4]:
df = aggregate_experiment_results('./results/')

In [5]:
df

Unnamed: 0,Model Family,PEFT Method,Task,Variant,Accuracy/Metric,Trainable Params (M),Train Runtime (s),Eval Runtime (s),Avg GPU Memory (MB)
0,bert,olora,wnli,fft,0.338,109.4838,58.16,0.47,1331.73
2,bert,olora,wnli,kd-lora,0.5634,0.7396,19.48,0.26,1608.91
1,bert,olora,wnli,lora,0.3803,0.2964,40.29,0.84,2050.46
