# V5 Training Monitor

This notebook monitors the training metrics of the currently running IndraV5 experiment.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import time
from IPython.display import clear_output

sns.set_theme(style="darkgrid")

In [None]:
def get_latest_run_metrics():
    # Find the latest run folder
    runs_dir = "../runs"
    if not os.path.exists(runs_dir):
        print("No runs directory found.")
        return None

    # Get all subdirectories
    run_paths = [
        os.path.join(runs_dir, d)
        for d in os.listdir(runs_dir)
        if os.path.isdir(os.path.join(runs_dir, d))
    ]
    if not run_paths:
        print("No runs found.")
        return None

    # Sort by time
    run_paths.sort(key=os.path.getmtime, reverse=True)
    latest_run = run_paths[0]

    metrics_file = os.path.join(latest_run, "metrics.csv")
    if not os.path.exists(metrics_file):
        print(f"No metrics.csv found in {latest_run}")
        return None

    print(f"Monitoring: {latest_run}")
    try:
        df = pd.read_csv(metrics_file)
        return df
    except Exception as e:
        print(f"Error reading CSV: {e}")
        return None

In [None]:
def plot_metrics(df):
    if df is None or len(df) == 0:
        return

    fig, axes = plt.subplots(2, 2, figsize=(15, 10))

    # Loss
    sns.lineplot(data=df, x="step", y="total_loss", ax=axes[0, 0], label="Total Loss")
    sns.lineplot(
        data=df, x="step", y="ce_loss", ax=axes[0, 0], label="CE Loss", linestyle="--"
    )
    axes[0, 0].set_title("Training Loss")
    axes[0, 0].set_xlabel("Step")
    axes[0, 0].set_ylabel("Loss")
    axes[0, 0].legend()

    # Phase Clustering (R)
    sns.lineplot(data=df, x="step", y="phase_loss", ax=axes[0, 1], color="orange")
    axes[0, 1].set_title("Phase Clustering (R)")
    axes[0, 1].set_xlabel("Step")
    axes[0, 1].set_ylabel("R (Circular Variance)")

    # Magnitude Regulation (M)
    sns.lineplot(data=df, x="step", y="mag_loss", ax=axes[1, 0], color="green")
    axes[1, 0].set_title("Magnitude Regulation")
    axes[1, 0].set_xlabel("Step")
    axes[1, 0].set_ylabel("Mag Reg Loss")

    # Learning Rate
    sns.lineplot(data=df, x="step", y="lr", ax=axes[1, 1], color="purple")
    axes[1, 1].set_title("Learning Rate")
    axes[1, 1].set_xlabel("Step")
    axes[1, 1].set_ylabel("LR")

    plt.tight_layout()
    plt.show()

In [None]:
# Real-time Loop
try:
    while True:
        clear_output(wait=True)
        df = get_latest_run_metrics()
        plot_metrics(df)
        time.sleep(30)  # Refresh every 30s
except KeyboardInterrupt:
    print("Stopped Monitoring.")