In [9]:
import os
import pandas as pd
import numpy as np
from tensorboard.backend.event_processing import event_accumulator
from tabulate import tabulate

runs_dir = './runs'  # Set to '.' to scan the entire current directory
ref_model_name = "BERT4Rec"  # Still keeps track, but no % improvement calculated

all_scalars = []

for root, dirs, files in os.walk(runs_dir):
    for file in files:
        if file.startswith('events.out.tfevents'):
            file_path = os.path.join(root, file)
            print(f"Processing: {file_path}")

            ea = event_accumulator.EventAccumulator(file_path, size_guidance={'scalars': 0})
            ea.Reload()

            tags = ea.Tags().get('scalars', [])

            folder_name = os.path.basename(root)
            parts = folder_name.split('_')

            model_name = parts[0] if len(parts) > 0 else 'Unknown'
            dataset = parts[1] if len(parts) > 1 else 'Unknown'

            seq_len = 'Unknown'
            seed = 'Unknown'

            for idx, part in enumerate(parts):
                if part == 'SL' and idx + 1 < len(parts):
                    seq_len = parts[idx + 1]
                if part == 'seed' and idx + 1 < len(parts):
                    seed = parts[idx + 1]

            for tag in tags:
                values = [e.value for e in ea.Scalars(tag)]

                for val in values:
                    all_scalars.append({
                        'Model': model_name,
                        'Dataset': dataset,
                        'Seq Len': seq_len,
                        'Seed': seed,
                        'Metric': tag,
                        'Value': val
                    })

# Convert to DataFrame
df = pd.DataFrame(all_scalars)

if df.empty:
    print("No valid TensorBoard run files found.")
else:
    # df.to_excel("tensorboard_all_scalars.xlsx", index=False)
    # print("Saved all scalar values to 'tensorboard_all_scalars.xlsx'")

    agg_df = df.groupby(["Model", "Dataset", "Seq Len", "Metric"]).agg({
        "Value": ["mean", "std"]
    }).reset_index()

    agg_df.columns = ["Model", "Dataset", "Seq Len", "Metric", "Mean", "Std"]

    pivot_df = agg_df.pivot_table(index=["Model", "Dataset", "Seq Len"], columns="Metric", values=["Mean", "Std"])
    pivot_df.columns = ['_'.join(col).strip() for col in pivot_df.columns.values]
    pivot_df = pivot_df.reset_index()

    # Convert Seq Len to numeric for sorting
    pivot_df["Seq Len"] = pd.to_numeric(pivot_df["Seq Len"], errors='coerce')

    # Format Recall
    pivot_df["Recall@10"] = pivot_df.apply(
        lambda x: f"{x['Mean_Recall@10/test']:.4f} ± {x['Std_Recall@10/test']:.4f}" if "Mean_Recall@10/test" in x else "N/A",
        axis=1
    )

    # Format NDCG
    pivot_df["NDCG@10"] = pivot_df.apply(
        lambda x: f"{x['Mean_NDCG@10/test']:.4f} ± {x['Std_NDCG@10/test']:.4f}" if "Mean_NDCG@10/test" in x else "N/A",
        axis=1
    )

    # Format MRR
    pivot_df["MRR@10"] = pivot_df.apply(
        lambda x: f"{x['Mean_MRR@10/test']:.4f} ± {x['Std_MRR@10/test']:.4f}" if "Mean_MRR@10/test" in x else "N/A",
        axis=1
    )

    # Include TotalParams if available
    if "Mean_Model/TotalParams" in pivot_df.columns:
        pivot_df["TotalParams"] = pivot_df["Mean_Model/TotalParams"].apply(lambda x: f"{x:,.0f}")
    else:
        pivot_df["TotalParams"] = "N/A"

    # Final columns
    final_cols = ["Model", "Dataset", "Seq Len", "TotalParams", "Recall@10", "NDCG@10", "MRR@10"]

    # Optional extras
    if "Mean_HitRate@10/test" in pivot_df.columns:
        final_cols.append("Mean_HitRate@10/test")
    if "Mean_Loss/train" in pivot_df.columns:
        final_cols.append("Mean_Loss/train")
    if "Mean_EpochTime" in pivot_df.columns:
        final_cols.append("Mean_EpochTime")

    final_df = pivot_df[final_cols]

    # Sort by Model, Dataset, Seq Len (ascending)
    final_df = final_df.sort_values(by=["Model", "Dataset", "Seq Len"]).reset_index(drop=True)

    print("\nConsolidated Results (All Scalars Aggregated):\n")
    print(tabulate(final_df, headers="keys", tablefmt="github", showindex=False))

    final_df.to_excel("tensorboard_summary_aggregated.xlsx", index=False)
    print("Saved final summary table to 'tensorboard_summary_aggregated.xlsx'")

Processing: ./runs\BERT4Rec_100K_SL_128_Emb_64_seed_123\events.out.tfevents.1750637781.f46cf7507a6d.6381.31
Processing: ./runs\BERT4Rec_100K_SL_128_Emb_64_seed_123\events.out.tfevents.1750637841.f46cf7507a6d.6381.34
Processing: ./runs\BERT4Rec_100K_SL_128_Emb_64_seed_2023\events.out.tfevents.1750637833.f46cf7507a6d.6381.33
Processing: ./runs\BERT4Rec_100K_SL_128_Emb_64_seed_2023\events.out.tfevents.1750637894.f46cf7507a6d.6381.36
Processing: ./runs\BERT4Rec_100K_SL_128_Emb_64_seed_42\events.out.tfevents.1750637721.f46cf7507a6d.6381.28
Processing: ./runs\BERT4Rec_100K_SL_128_Emb_64_seed_42\events.out.tfevents.1750637781.f46cf7507a6d.6381.32
Processing: ./runs\BERT4Rec_100K_SL_32_Emb_64_seed_123\events.out.tfevents.1750637448.f46cf7507a6d.6381.19
Processing: ./runs\BERT4Rec_100K_SL_32_Emb_64_seed_123\events.out.tfevents.1750637509.f46cf7507a6d.6381.22
Processing: ./runs\BERT4Rec_100K_SL_32_Emb_64_seed_2023\events.out.tfevents.1750637500.f46cf7507a6d.6381.20
Processing: ./runs\BERT4Rec_10

In [5]:
%load_ext tensorboard

import os
import tensorboard
from glob import glob

logdirs = [d for d in os.listdir('.') if os.path.isdir(d) and glob(f"{d}/events.out.tfevents.*")]
logdir_spec = ",".join([f"{d}:{d}" for d in logdirs])
print("Launching TensorBoard with logdir_spec:", logdir_spec)
%tensorboard --logdir_spec $logdir_spec

Launching TensorBoard with logdir_spec: BERT4Rec_100K_SL100_Emb64:BERT4Rec_100K_SL100_Emb64,BERT4Rec_100K_SL50_Emb64:BERT4Rec_100K_SL50_Emb64,BERT4Rec_100K_SL75_Emb64:BERT4Rec_100K_SL75_Emb64,BERT4Rec_1M_SL100_Emb128:BERT4Rec_1M_SL100_Emb128,BERT4Rec_1M_SL50_Emb128:BERT4Rec_1M_SL50_Emb128,BERT4Rec_1M_SL75_Emb128:BERT4Rec_1M_SL75_Emb128,SAS4Rec_100K_SL100_Emb64:SAS4Rec_100K_SL100_Emb64,SAS4Rec_100K_SL50_Emb64:SAS4Rec_100K_SL50_Emb64,SAS4Rec_100K_SL75_Emb64:SAS4Rec_100K_SL75_Emb64,SAS4Rec_1M_SL100_Emb128:SAS4Rec_1M_SL100_Emb128,SAS4Rec_1M_SL50_Emb128:SAS4Rec_1M_SL50_Emb128,SAS4Rec_1M_SL75_Emb128:SAS4Rec_1M_SL75_Emb128,xLSTM_100K_SL100_Emb64:xLSTM_100K_SL100_Emb64,xLSTM_100K_SL50_Emb64:xLSTM_100K_SL50_Emb64,xLSTM_100K_SL75_Emb64:xLSTM_100K_SL75_Emb64,xLSTM_1M_SL100_Emb128:xLSTM_1M_SL100_Emb128,xLSTM_1M_SL50_Emb128:xLSTM_1M_SL50_Emb128,xLSTM_1M_SL75_Emb128:xLSTM_1M_SL75_Emb128


In [None]:
# Save it to excel file

In [7]:
import os
import pandas as pd
from glob import glob
from tensorboard.backend.event_processing import event_accumulator

# Initialize list to store scalar data
all_scalars = []

# Find valid TensorBoard log directories
logdirs = [d for d in os.listdir('.') if os.path.isdir(d) and glob(f"{d}/events.out.tfevents.*")]
print(f"Found {len(logdirs)} log directories.")

for logdir in logdirs:
    event_files = glob(f"{logdir}/events.out.tfevents.*")
    if not event_files:
        continue

    ea = event_accumulator.EventAccumulator(logdir, size_guidance={'scalars': 0})
    try:
        ea.Reload()
    except Exception as e:
        print(f"Skipping {logdir} due to error: {e}")
        continue

    # Iterate through all scalar tags
    for tag in ea.Tags().get("scalars", []):
        events = ea.Scalars(tag)
        for event in events:
            all_scalars.append({
                "run": logdir,
                "tag": tag,
                "step": event.step,
                "wall_time": event.wall_time,
                "value": event.value,
            })

# Create DataFrame
df = pd.DataFrame(all_scalars)

# Save to Excel
output_path = "tensorboard_scalars.xlsx"
df.to_excel(output_path, index=False)
print(f"\nExcel file saved to: {output_path} ({len(df)} rows). Ready for Excel.")


Found 18 log directories.

✅ Excel file saved to: tensorboard_scalars.xlsx (763598 rows). Ready for Tableau/Excel.


In [1]:
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir runs/