In [7]:
#!/usr/bin/env python3
import os
import re

# metric names remain the same
METRIC_KEYS = [
    "FLOPs per inference",
    "Avg inference time per event (ns)",
    "Validation accuracy",
    "1/FPR@TPR=0.8 for g",
    "1/FPR@TPR=0.8 for q",
    "1/FPR@TPR=0.8 for W",
    "1/FPR@TPR=0.8 for Z",
    "1/FPR@TPR=0.8 for t",
    "Average 1/FPR",
    "Total params",
    "Trainable params",
]

LOG_LINE_RE = re.compile(
    r'^(?P<ts>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d+)\s+INFO\s+'
    r'(?P<key>[^:]+):\s+(?P<val>.+)$'
)

def find_root_dirs(start_path):
    """
    Find **all** subdirectories under start_path that contain both
    train.log and acc_curve.png.
    """
    matches = []
    for dirpath, dirnames, filenames in os.walk(start_path):
        if "train.log" in filenames and "acc_curve.png" in filenames:
            matches.append(dirpath)
    return matches
    # ⇒ CHANGED: collects all matches instead of returning on the first one

def parse_log(log_path):
    """
    Read train.log and extract keys in METRIC_KEYS.
    """
    results = {}
    with open(log_path, 'r') as f:
        for line in f:
            m = LOG_LINE_RE.match(line.strip())
            if not m:
                continue
            key = m.group("key")
            if key in METRIC_KEYS:
                results[key] = {
                    "timestamp": m.group("ts"),
                    "value": m.group("val")
                }
    return results

def main():
    # You can swap this for argparse if you like
    root_path = '/j-jepa-vol/linformer4HEP/runs'

    roots = find_root_dirs(root_path)
    if not roots:
        print(f"ERROR: No directory under {root_path} contains train.log and acc_curve.png")
        return

    # ⇒ CHANGED: loop through **all** matching directories
    for root in roots:
        print(f"\n=== Processing {root} ===")
        log_file = os.path.join(root, "train.log")
        metrics = parse_log(log_file)

        if not metrics:
            print(f"WARNING: No metrics found in {log_file}")
            continue

        print(f"Extracted metrics from {log_file}:")
        for key in METRIC_KEYS:
            if key in metrics:
                ts = metrics[key]["timestamp"]
                val = metrics[key]["value"]
                print(f"{ts} INFO {key}: {val}")
            else:
                print(f"(not found) {key}")

In [8]:
main()


=== Processing /j-jepa-vol/linformer4HEP/runs/cluster_f/150/pt ===
Extracted metrics from /j-jepa-vol/linformer4HEP/runs/cluster_f/150/pt/train.log:
2025-04-19 05:38:13,703 INFO FLOPs per inference: 2045730
2025-04-19 05:38:14,183 INFO Avg inference time per event (ns): 5354.795
2025-04-19 05:38:15,709 INFO Validation accuracy: 0.8063
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for g: 12.900
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for q: 9.495
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for W: 48.781
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for Z: 74.138
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for t: 30.763
2025-04-19 05:38:16,243 INFO Average 1/FPR: 35.215
2025-04-19 02:56:47,758 INFO Total params: 16917 (66.08 KB)
2025-04-19 02:56:47,758 INFO Trainable params: 16917 (66.08 KB)

=== Processing /j-jepa-vol/linformer4HEP/runs/cluster_f/150/delta_R ===
Extracted metrics from /j-jepa-vol/linformer4HEP/runs/cluster_f/150/delta_R/train.log:
2025-04-19 08:18:48,156 INFO FLOPs per infe

In [5]:
import os
import re
import pandas as pd
import numpy as np

def parse_train_log(log_path):
    with open(log_path, 'r') as f:
        content = f.read()

    def extract(pattern, transform=float, default=np.nan):
        match = re.search(pattern, content)
        return transform(match.group(1)) if match else default

    def extract_auc(label):
        return extract(rf"ROC AUC for {label}:\s*([0-9.]+)")

    def extract_fpr(label):
        return extract(rf"1/FPR @0\.8 TPR for {label}:\s*([0-9.]+)")

    return {
        'Params': extract(r"Total parameters:\s*(\d+)"),
        'FLOPs': extract(r"FLOPs per inference:\s*(\d+)"),
        'GPU_peak_MB': extract(r"peak: ([0-9.]+) MB"),
        'Inference time (ns)': extract(r"Avg inference time / event:\s*([0-9.]+)"),
        'Test Accuracy': extract(r"Test Accuracy:\s*([0-9.]+)"),
        'ROC AUC': extract(r"ROC AUC:\s*([0-9.]+)"),
        'ROC AUC g': extract_auc("g"),
        'ROC AUC q': extract_auc("q"),
        'ROC AUC W': extract_auc("W"),
        'ROC AUC Z': extract_auc("Z"),
        'ROC AUC t': extract_auc("t"),
        '1/FPR g': extract_fpr("g"),
        '1/FPR q': extract_fpr("q"),
        '1/FPR W': extract_fpr("W"),
        '1/FPR Z': extract_fpr("Z"),
        '1/FPR t': extract_fpr("t"),
        'Avg 1/FPR': extract(r"Avg 1/FPR @0\.8 TPR:\s*([0-9.]+)")
    }

def collect_all_trials(base_dir):
    records = []
    for root, dirs, files in os.walk(base_dir):
        if "train.log" in files and "trial-" in root:
            log_path = os.path.join(root, "train.log")
            sort_match = re.search(r"/(pt|delta_R|kt)/trial-\d+", root)
            model_match = re.search(r"runs/(.+?)/\d+/", root)
            record = parse_train_log(log_path)
            record['Sort'] = sort_match.group(1) if sort_match else "N/A"
            record['Model'] = model_match.group(1) if model_match else "N/A"
            records.append(record)
    return pd.DataFrame(records)

def summarize_trials(df):
    if df.empty:
        return pd.DataFrame()
    
    grouped = df.groupby(['Model', 'Sort'])
    summary_rows = []

    for (model, sort), group in grouped:
        summary = {'Model': model, 'Sort': sort}
        for col in group.columns:
            if col in ['Model', 'Sort']:
                continue
            values = pd.to_numeric(group[col], errors='coerce')
            mean = values.mean()
            std = values.std()
            summary[col] = f"{mean:.4f} ± {std:.4f}" if not np.isnan(std) else f"{mean:.4f}"
        summary_rows.append(summary)

    return pd.DataFrame(summary_rows)

# Set base directory
base_dir = "/j-jepa-vol/linformer4HEP/runs/double_batch_size/transformer_w_test"

# Run collection and summarization
df_raw = collect_all_trials(base_dir)
df_summary = summarize_trials(df_raw)

# Save to CSV
output_path = "/j-jepa-vol/linformer4HEP/summary_table.csv"
df_summary.to_csv(output_path, index=False)
print(df_summary)

                                  Model     Sort              Params  \
0  double_batch_size/transformer_w_test  delta_R  2009.0000 ± 0.0000   
1  double_batch_size/transformer_w_test       kt  2009.0000 ± 0.0000   
2  double_batch_size/transformer_w_test       pt  2009.0000 ± 0.0000   

                   FLOPs            GPU_peak_MB     Inference time (ns)  \
0  2479918.0000 ± 0.0000   5752.4000 ± 460.4000   16582.7433 ± 657.0758   
1  2479918.0000 ± 0.0000   6903.4500 ± 325.6227   17616.3100 ± 651.3443   
2  2479918.0000 ± 0.0000  7121.2200 ± 3554.9169  15454.6900 ± 3805.3994   

     Test Accuracy          ROC AUC        ROC AUC g        ROC AUC q  \
0  0.8131 ± 0.0014  0.9590 ± 0.0008  0.9498 ± 0.0010  0.9289 ± 0.0011   
1  0.8121 ± 0.0004  0.9588 ± 0.0001  0.9496 ± 0.0003  0.9288 ± 0.0001   
2  0.8127 ± 0.0008  0.9589 ± 0.0004  0.9497 ± 0.0005  0.9288 ± 0.0006   

         ROC AUC W        ROC AUC Z        ROC AUC t           1/FPR g  \
0  0.9767 ± 0.0003  0.9733 ± 0.0004  0.9693

In [2]:
!pip install ace_tools

