In [2]:
import os
import pandas as pd

task = "acsa"
RESULTS_PATH = 'Students/'  # or 'LLM/'
runs = []

# loop through subfolders in RESULTS_PATH
folder_names = [
    folder for folder in os.listdir(RESULTS_PATH)
    if os.path.isdir(os.path.join(RESULTS_PATH, folder))
    and folder != '.ipynb_checkpoints'
]

for folder_name in folder_names:
    try:
        cond_parameters = folder_name.split('_')  # if you still want to keep some info from folder name
        # pick correct metrics file depending on task
        if folder_name.startswith('acd'):
            df = pd.read_csv(os.path.join(RESULTS_PATH, folder_name, 'metrics_asp.tsv'), sep='\t')
        elif folder_name.startswith('tasd'):
            df = pd.read_csv(os.path.join(RESULTS_PATH, folder_name, 'metrics_phrases.tsv'), sep='\t')
        else:  # acsa
            df = pd.read_csv(os.path.join(RESULTS_PATH, folder_name, 'result_asp_pol.tsv'), sep='\t')

        df = df.set_index(df.columns[0])

        if task == 'tasd':
            f1_micro = df.loc['Micro-AVG', 'f1']
            f1_macro = df.loc['Macro-AVG', 'f1']
            accuracy = df.loc['Micro-AVG', 'accuracy']
        elif task == 'acsa':  # acsa
            f1_micro = df.loc['Micro-AVG', 'F1']
            f1_macro = df.loc['Macro-AVG', 'F1']
            accuracy = df.loc['Micro-AVG', 'Accuracy']

        runs.append([cond_parameters[0], f1_micro, f1_macro, accuracy])

    except Exception as e:
        continue

# build DataFrame with only the columns you need
results_all = pd.DataFrame(runs, columns=["task", "f1-micro", "f1-macro", "accuracy"])

print(RESULTS_PATH)
results_all


Students/


Unnamed: 0,task,f1-micro,f1-macro,accuracy
0,acsa,0.8505,0.8413,0.7398
1,acsa,0.8525,0.843,0.743
2,acsa,0.8663,0.8546,0.7641
3,acsa,0.8568,0.8441,0.7494
4,acsa,0.8595,0.8337,0.7536


In [3]:
# --- Compute average across all runs for this task ---
results_avg = results_all.groupby("task")[["f1-micro", "f1-macro", "accuracy"]].mean().reset_index()

# Convert to percentages for F1 if you want (optional)
results_avg["f1-micro"] = (results_avg["f1-micro"] * 100).round(2)
results_avg["f1-macro"] = (results_avg["f1-macro"] * 100).round(2)
results_avg["accuracy"] = results_avg["accuracy"].round(4)

print("\nAverage results across seeds:")
print(RESULTS_PATH)
results_avg



Average results across seeds:
Students/


Unnamed: 0,task,f1-micro,f1-macro,accuracy
0,acsa,85.71,84.33,0.75


In [21]:
import os, sys, json
import pandas as pd
import numpy as np

# ---- SWITCH HERE ----
task_mode = "tasd"   # change to "acsa" when needed
# ---------------------

col_names = [
    'task', 'dataset', 'eval_type', 'data_setting',
    'learning-rate', 'batch_size', 'epochs', 'seed',
    'training_time', 'used_memory'
]
runs = []

RESULTS_PATH = 'LLM/'
folder_names = [
    folder for folder in os.listdir(RESULTS_PATH) 
    if os.path.isdir(os.path.join(RESULTS_PATH, folder)) 
    and folder != '.ipynb_checkpoints'
]

for folder_name in folder_names:
    try:
        cond_parameters = folder_name.split('_')

        # Only process folders matching the chosen task
        if cond_parameters[0] != task_mode:
            continue  

        config_path = os.path.join(RESULTS_PATH, folder_name, 'config.json')
        with open(config_path, "r") as f:
            config = json.load(f)

        # Add runtime + GPU memory
        cond_parameters.append(config.get("training_time", None))
        cond_parameters.append(config.get("used_memory", None))

        runs.append(cond_parameters)

    except Exception as e:
        print(f"Skipping {folder_name}: {e}")
        pass

# Build DataFrame
results_all = pd.DataFrame(runs, columns=col_names)

print(f"=== Raw results for {task_mode.upper()} ===")
print(results_all.head())

# ---- Helper: convert "HH:MM:SS.sss" → seconds
def parse_time_to_seconds(t):
    if pd.isna(t):
        return np.nan
    try:
        h, m, s = t.split(":")
        return int(h) * 3600 + int(m) * 60 + float(s)
    except Exception:
        return np.nan

results_all["training_time_sec"] = results_all["training_time"].apply(parse_time_to_seconds)

# Group configs
config_cols = ["data_setting", "eval_type", "learning-rate", "epochs", "batch_size"]

df_grouped = (
    results_all.groupby(config_cols).agg({
        "training_time_sec": "mean",
        "used_memory": "mean"
    }).reset_index()
)

# Format runtime back into HH:MM
df_grouped["training_time"] = df_grouped["training_time_sec"].apply(
    lambda x: f"{int(x//3600):02d}:{int((x%3600)//60):02d}" if not np.isnan(x) else None
)

print(f"=== Grouped results for {task_mode.upper()} ===")
print(df_grouped.head())


=== Raw results for TASD ===
   task   dataset eval_type data_setting learning-rate batch_size epochs seed  \
0  tasd  llm-tasd      test       orig-o        0.0002         16      6   10   
1  tasd  llm-tasd      test       orig-o        0.0002         16      6   15   
2  tasd  llm-tasd      test       orig-o        0.0002         16      6   20   
3  tasd  llm-tasd      test       orig-o        0.0002         16      6   25   
4  tasd  llm-tasd      test       orig-o        0.0002         16      6    5   

    training_time  used_memory  
0  0:38:51.495268        14.27  
1  0:38:49.898244        14.27  
2  0:38:50.326308        14.27  
3  0:38:48.677868        14.27  
4  0:38:45.024006        14.27  
=== Grouped results for TASD ===
  data_setting eval_type learning-rate epochs batch_size  training_time_sec  \
0       orig-o      test        0.0002      6         16        2329.084339   

   used_memory training_time  
0        14.27         00:38  
