In [7]:
#!/usr/bin/env python3
import os
import re

# metric names remain the same
METRIC_KEYS = [
    "FLOPs per inference",
    "Avg inference time per event (ns)",
    "Validation accuracy",
    "1/FPR@TPR=0.8 for g",
    "1/FPR@TPR=0.8 for q",
    "1/FPR@TPR=0.8 for W",
    "1/FPR@TPR=0.8 for Z",
    "1/FPR@TPR=0.8 for t",
    "Average 1/FPR",
    "Total params",
    "Trainable params",
]

LOG_LINE_RE = re.compile(
    r'^(?P<ts>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d+)\s+INFO\s+'
    r'(?P<key>[^:]+):\s+(?P<val>.+)$'
)

def find_root_dirs(start_path):
    """
    Find **all** subdirectories under start_path that contain both
    train.log and acc_curve.png.
    """
    matches = []
    for dirpath, dirnames, filenames in os.walk(start_path):
        if "train.log" in filenames and "acc_curve.png" in filenames:
            matches.append(dirpath)
    return matches
    # ⇒ CHANGED: collects all matches instead of returning on the first one

def parse_log(log_path):
    """
    Read train.log and extract keys in METRIC_KEYS.
    """
    results = {}
    with open(log_path, 'r') as f:
        for line in f:
            m = LOG_LINE_RE.match(line.strip())
            if not m:
                continue
            key = m.group("key")
            if key in METRIC_KEYS:
                results[key] = {
                    "timestamp": m.group("ts"),
                    "value": m.group("val")
                }
    return results

def main():
    # You can swap this for argparse if you like
    root_path = '/j-jepa-vol/linformer4HEP/runs'

    roots = find_root_dirs(root_path)
    if not roots:
        print(f"ERROR: No directory under {root_path} contains train.log and acc_curve.png")
        return

    # ⇒ CHANGED: loop through **all** matching directories
    for root in roots:
        print(f"\n=== Processing {root} ===")
        log_file = os.path.join(root, "train.log")
        metrics = parse_log(log_file)

        if not metrics:
            print(f"WARNING: No metrics found in {log_file}")
            continue

        print(f"Extracted metrics from {log_file}:")
        for key in METRIC_KEYS:
            if key in metrics:
                ts = metrics[key]["timestamp"]
                val = metrics[key]["value"]
                print(f"{ts} INFO {key}: {val}")
            else:
                print(f"(not found) {key}")

In [8]:
main()


=== Processing /j-jepa-vol/linformer4HEP/runs/cluster_f/150/pt ===
Extracted metrics from /j-jepa-vol/linformer4HEP/runs/cluster_f/150/pt/train.log:
2025-04-19 05:38:13,703 INFO FLOPs per inference: 2045730
2025-04-19 05:38:14,183 INFO Avg inference time per event (ns): 5354.795
2025-04-19 05:38:15,709 INFO Validation accuracy: 0.8063
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for g: 12.900
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for q: 9.495
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for W: 48.781
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for Z: 74.138
2025-04-19 05:38:16,243 INFO 1/FPR@TPR=0.8 for t: 30.763
2025-04-19 05:38:16,243 INFO Average 1/FPR: 35.215
2025-04-19 02:56:47,758 INFO Total params: 16917 (66.08 KB)
2025-04-19 02:56:47,758 INFO Trainable params: 16917 (66.08 KB)

=== Processing /j-jepa-vol/linformer4HEP/runs/cluster_f/150/delta_R ===
Extracted metrics from /j-jepa-vol/linformer4HEP/runs/cluster_f/150/delta_R/train.log:
2025-04-19 08:18:48,156 INFO FLOPs per infe