# Experiment Analysis Framework

This notebook aggregates prior training artifacts from **neural-network-lab-python**, surfaces diagnostic visualizations, and recommends data-driven hyperparameter refinements for future experiments. It is designed to be reusable across training runs with minimal manual setup.

## Workflow Overview

1. Validate the presence of required configs, logs, scalers, and weight checkpoints.
2. Load active and historical configuration payloads and align them with training outcomes.
3. Ingest `loss_history.csv`, `training_results.csv`, and particle simulation data for analytics.
4. Reconstruct the latest model checkpoint, generate predictions, and evaluate residuals.
5. Render visual diagnostics (loss curves, learning-rate sweeps, residual histograms, correlation heatmap).
6. Summarize run health, recommend hyperparameter sweeps, and capture actionable next steps.

In [None]:
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple

import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from IPython.display import Markdown, display

from advanced_neural_network import AdvancedNeuralNetwork
from data_processing import complete_data_pipeline, load_and_validate_data
from ml_utils import compute_loss_weights
from weight_constraints import BinaryWeightConstraintChanges, BinaryWeightConstraintMax, OscillationDampener

pd.options.display.max_rows = 60
pd.options.display.float_format = '{:,.4f}'.format

sns.set_theme(style="whitegrid")

In [None]:
PROJECT_NAME = "neural-network-lab-python"

INPUT_FEATURES = [
    "mass",
    "initial_velocity_x",
    "initial_velocity_y",
    "initial_position_x",
    "initial_position_y",
    "charge",
    "magnetic_field_strength",
    "simulation_time"
]

OUTPUT_TARGETS = [
    "final_velocity_x",
    "final_velocity_y",
    "final_position_x",
    "final_position_y",
    "kinetic_energy",
    "trajectory_length"
]

ANALYSIS_SEED = 42

np.random.seed(ANALYSIS_SEED)
tf.random.set_seed(ANALYSIS_SEED)


def format_bytes(size: Optional[int]) -> Optional[str]:
    """Format raw byte counts into human readable text."""
    if size is None: return None

    threshold = 1024.0

    units = ("B", "KB", "MB", "GB", "TB")

    value = float(size)

    for unit in units:
        if value < threshold or unit == units[-1]: return f"{value:.1f} {unit}"

        value /= threshold


def resolve_project_paths() -> Dict[str, Path]:
    """Resolve key project directories relative to this notebook."""
    root = Path.cwd()

    if root.name != PROJECT_NAME:
        for parent in root.parents:
            if parent.name == PROJECT_NAME: root = parent

    config_dir = root / "ml_config"

    output_dir = root / "training_output"

    analysis_dir = output_dir / "analysis"

    figures_dir = analysis_dir / "figures"

    analysis_dir.mkdir(parents=True, exist_ok=True)

    figures_dir.mkdir(parents=True, exist_ok=True)

    return {
        "project_root": root,
        "config_dir": config_dir,
        "output_dir": output_dir,
        "analysis_dir": analysis_dir,
        "figures_dir": figures_dir,
        "data_path": root / "particle_data.csv",
        "scaler_X": root / "scaler_X.pkl",
        "scaler_y": root / "scaler_y.pkl"
    }


def validate_required_artifacts(paths: Dict[str, Path]) -> pd.DataFrame:
    """Check presence and metadata of required artifacts."""
    required = {
        "model_config": paths["config_dir"] / "model_config.json",
        "training_config": paths["config_dir"] / "training_config.json",
        "loss_history": paths["output_dir"] / "loss_history.csv",
        "training_results": paths["output_dir"] / "training_results.csv",
        "configuration_log": paths["output_dir"] / "configuration_log.csv",
        "particle_data": paths["data_path"],
        "scaler_X": paths["scaler_X"],
        "scaler_y": paths["scaler_y"]
    }

    optional = {
        "analysis_dir": paths["analysis_dir"],
        "figures_dir": paths["figures_dir"]
    }

    notes = {
        "particle_data": "Regenerate via data pipeline if missing.",
        "scaler_X": "Rebuilt automatically through complete_data_pipeline.",
        "scaler_y": "Rebuilt automatically through complete_data_pipeline."
    }

    records: List[Dict[str, Any]] = []

    def append_record(label: str, path: Path, critical: bool) -> None:
        exists = path.exists()

        size = path.stat().st_size if exists and path.is_file() else None

        modified = pd.Timestamp(path.stat().st_mtime, unit="s") if exists else None

        records.append({
            "artifact": label,
            "critical": critical,
            "exists": exists,
            "path": str(path.relative_to(paths["project_root"])) if exists else str(path),
            "size_bytes": size,
            "size_readable": format_bytes(size),
            "modified": modified,
            "note": notes.get(label)
        })

    for label, path in required.items():
        append_record(label, path, True)

    for label, path in optional.items():
        append_record(label, path, False)

    status_df = pd.DataFrame(records)

    if status_df.empty: return status_df

    status_df = status_df.sort_values(["critical", "artifact"], ascending=[False, True]).reset_index(drop=True)

    return status_df


def list_checkpoint_weights(paths: Dict[str, Path]) -> pd.DataFrame:
    """List available weight checkpoints with epoch metadata."""
    pattern = "model_weights_epoch_*.weights.h5"

    checkpoint_files = sorted(paths["project_root"].glob(pattern))

    rows: List[Dict[str, Any]] = []

    for file_path in checkpoint_files:
        name = file_path.name

        parts = name.split("_")

        epoch_token = parts[3] if len(parts) > 3 else parts[-1]

        epoch = int(epoch_token.replace(".weights.h5", "")) if epoch_token else None

        rows.append({
            "epoch": epoch,
            "name": name,
            "path": str(file_path.relative_to(paths["project_root"])) if file_path.exists() else str(file_path),
            "modified": pd.Timestamp(file_path.stat().st_mtime, unit="s"),
            "size_bytes": file_path.stat().st_size
        })

    checkpoint_df = pd.DataFrame(rows)

    if checkpoint_df.empty: return checkpoint_df

    checkpoint_df = checkpoint_df.sort_values("epoch").reset_index(drop=True)

    latest_epoch = checkpoint_df["epoch"].max()

    checkpoint_df["size_readable"] = checkpoint_df["size_bytes"].apply(format_bytes)

    checkpoint_df["is_latest"] = checkpoint_df["epoch"] == latest_epoch

    return checkpoint_df

In [None]:
def load_configs(paths: Dict[str, Path]) -> Tuple[Dict[str, Any], Dict[str, Any], pd.DataFrame]:
    """Load active configs and historical configuration snapshots with derived metrics."""
    model_config_path = paths["config_dir"] / "model_config.json"

    training_config_path = paths["config_dir"] / "training_config.json"

    with model_config_path.open() as handle:
        model_config = json.load(handle)

    with training_config_path.open() as handle:
        training_config = json.load(handle)

    snapshots: List[Dict[str, Any]] = []

    for config_path in sorted(paths["output_dir"].glob("training_config_*.json")):
        with config_path.open() as handle:
            payload = json.load(handle)

        combined: Dict[str, Any] = {
            "config_id": payload.get("config_id"),
            "timestamp": payload.get("timestamp")
        }

        model_payload = payload.get("model_config", {})

        for key, value in model_payload.items():
            combined[key] = value

        training_payload = payload.get("training_config", {})

        for key, value in training_payload.items():
            combined[f"train_{key}"] = value

        summary_payload = payload.get("performance_summary", {})

        combined["best_r2"] = summary_payload.get("best_r2")
        combined["final_r2"] = summary_payload.get("current_r2")
        combined["best_epoch"] = summary_payload.get("best_r2_epoch")
        combined["avg_epoch_time_logged"] = summary_payload.get("avg_epoch_time")
        combined["total_training_time"] = summary_payload.get("total_training_time")
        combined["weight_modifications_used"] = summary_payload.get("weight_modifications_used")
        combined["peak_memory_mb"] = summary_payload.get("peak_memory_mb")

        snapshots.append(combined)

    snapshots_df = pd.DataFrame(snapshots)

    if snapshots_df.empty: return model_config, training_config, snapshots_df

    snapshots_df["timestamp"] = pd.to_datetime(snapshots_df["timestamp"])

    if {"total_training_time", "train_epochs"}.issubset(snapshots_df.columns):
        snapshots_df["avg_epoch_time_calc"] = snapshots_df["total_training_time"] / snapshots_df["train_epochs"]

    snapshots_df["r2_delta"] = snapshots_df["best_r2"] - snapshots_df["final_r2"]

    snapshots_df = snapshots_df.sort_values("timestamp").reset_index(drop=True)

    return model_config, training_config, snapshots_df


def load_training_logs(paths: Dict[str, Path]) -> Dict[str, pd.DataFrame]:
    """Load loss history and training results with derived analytics."""
    loss_path = paths["output_dir"] / "loss_history.csv"

    results_path = paths["output_dir"] / "training_results.csv"

    loss_records = pd.read_csv(loss_path)

    loss_records = loss_records.sort_values(["epoch"]).reset_index(drop=True)

    loss_records["loss_ewm"] = loss_records["combined_loss"].ewm(alpha=0.15).mean()

    epoch_summary = (
        loss_records.groupby("epoch").agg(
            combined_loss_mean=("combined_loss", "mean"),
            combined_loss_std=("combined_loss", "std"),
            mae_mean=("mae", "mean"),
            mse_mean=("mse", "mean")
        ).reset_index()
    )

    results_df = pd.read_csv(results_path)

    results_df["timestamp"] = pd.to_datetime(results_df["timestamp"])

    results_df = results_df.sort_values("epoch").reset_index(drop=True)

    results_df["epoch"] = results_df["epoch"].astype(int)

    results_df["cumulative_time"] = results_df["epoch_time"].cumsum()

    results_df["val_loss_delta"] = results_df["val_loss"].diff()

    results_df["train_val_gap"] = results_df["val_loss"] - results_df["train_loss"]

    results_df["val_mae_delta"] = results_df["val_mae"].diff()

    results_df["epoch_time_rolling"] = results_df["epoch_time"].rolling(5, min_periods=1).mean()

    results_df["memory_headroom_mb"] = results_df["memory_mb"].max() - results_df["memory_mb"]

    merged_metrics = results_df.merge(epoch_summary, on="epoch", how="left")

    merged_metrics["val_loss_rolling"] = merged_metrics["val_loss"].rolling(5, min_periods=1).mean()

    merged_metrics["train_loss_rolling"] = merged_metrics["train_loss"].rolling(5, min_periods=1).mean()

    analytics = {
        "loss_records": loss_records,
        "epoch_summary": epoch_summary,
        "results": results_df,
        "merged_metrics": merged_metrics
    }

    return analytics


def load_scalers(paths: Dict[str, Path]) -> Tuple[Any, Any]:
    """Load cached scalers, regenerating them via training pipeline if missing."""
    scaler_X_path = paths["scaler_X"]

    scaler_y_path = paths["scaler_y"]

    pipeline_ran = False

    def ensure_pipeline() -> None:
        nonlocal pipeline_ran

        if pipeline_ran: return

        complete_data_pipeline(csv_path=str(paths["data_path"]))

        pipeline_ran = True

    try:
        scaler_X = joblib.load(scaler_X_path)
    except FileNotFoundError:
        ensure_pipeline()

        scaler_X = joblib.load(scaler_X_path)

    try:
        scaler_y = joblib.load(scaler_y_path)
    except FileNotFoundError:
        ensure_pipeline()

        scaler_y = joblib.load(scaler_y_path)

    return scaler_X, scaler_y


def load_particle_data(paths: Dict[str, Path]) -> pd.DataFrame:
    """Load particle simulation data with validation safeguards."""
    dataset = load_and_validate_data(csv_path=str(paths["data_path"]))

    if "particle_id" in dataset.columns:
        dataset = dataset.sort_values("particle_id").reset_index(drop=True)
    else:
        dataset = dataset.reset_index(drop=True)

    return dataset

In [None]:
def build_model_from_config(model_config: Dict[str, Any], training_config: Dict[str, Any]) -> tf.keras.Model:
    """Instantiate a compiled model that mirrors the training setup."""
    config_payload = dict(model_config)

    config_payload.update(training_config)

    config_payload.setdefault("enable_weight_oscillation_dampener", True)

    input_shape = (len(INPUT_FEATURES),)

    output_shape = len(OUTPUT_TARGETS)

    network = AdvancedNeuralNetwork(input_shape=input_shape, output_shape=output_shape, config=config_payload)

    network.compile_model()

    return network.model


def load_model_checkpoint(paths: Dict[str, Path], model_config: Dict[str, Any], training_config: Dict[str, Any], checkpoint_index: pd.DataFrame, checkpoint_name: Optional[str] = None) -> Tuple[Optional[tf.keras.Model], Optional[Dict[str, Any]]]:
    """Load model weights from the selected checkpoint."""
    if checkpoint_index.empty: return None, None

    if checkpoint_name is None:
        selected_row = checkpoint_index.iloc[-1]
    else:
        if checkpoint_name not in checkpoint_index["name"].values: return None, None

        selected_row = checkpoint_index.loc[checkpoint_index["name"] == checkpoint_name].iloc[0]

    weights_path = paths["project_root"] / selected_row["path"]

    tf.keras.backend.clear_session()

    model = build_model_from_config(model_config=model_config, training_config=training_config)

    model.load_weights(weights_path)

    metadata = {
        "epoch": int(selected_row["epoch"]),
        "weights_path": str(weights_path.relative_to(paths["project_root"])),
        "size_bytes": int(selected_row["size_bytes"]),
        "size_readable": selected_row.get("size_readable"),
        "modified": selected_row["modified"],
        "parameter_count": int(model.count_params())
    }

    return model, metadata


def compute_predictions(model: Optional[tf.keras.Model], scaler_X: Any, scaler_y: Any, particle_df: pd.DataFrame, sample_size: int = 256) -> Tuple[pd.DataFrame, Dict[str, Any]]:
    """Generate predictions and residual analytics using stored scalers."""
    if model is None: return pd.DataFrame(), {}

    feature_subset = particle_df[INPUT_FEATURES].copy()

    if sample_size and len(feature_subset) > sample_size:
        feature_subset = feature_subset.sample(sample_size, random_state=ANALYSIS_SEED).sort_index()

    scaled_inputs = scaler_X.transform(feature_subset.values) if scaler_X is not None else feature_subset.values

    predictions_scaled = model.predict(scaled_inputs, verbose=0)

    predictions = scaler_y.inverse_transform(predictions_scaled) if scaler_y is not None else predictions_scaled

    actual_outputs = particle_df.loc[feature_subset.index, OUTPUT_TARGETS].values

    residuals = predictions - actual_outputs

    residual_df = pd.DataFrame(index=feature_subset.index)

    if "particle_id" in particle_df.columns:
        residual_df["particle_id"] = particle_df.loc[feature_subset.index, "particle_id"]

    for idx, target in enumerate(OUTPUT_TARGETS):
        residual_df[f"actual_{target}"] = actual_outputs[:, idx]

        residual_df[f"pred_{target}"] = predictions[:, idx]

        residual_df[f"residual_{target}"] = residuals[:, idx]

    residual_df["residual_norm"] = np.linalg.norm(residuals, axis=1)

    residual_norm_mean = residual_df["residual_norm"].mean()

    residual_norm_std = residual_df["residual_norm"].std(ddof=0)

    if residual_norm_std and residual_norm_std > 0:
        residual_df["residual_norm_z"] = (residual_df["residual_norm"] - residual_norm_mean) / residual_norm_std

    mae_value = float(np.mean(np.abs(residuals)))

    rmse_value = float(np.sqrt(np.mean(np.square(residuals))))

    target_metrics: Dict[str, Dict[str, float]] = {}

    for idx, target in enumerate(OUTPUT_TARGETS):
        target_residuals = residuals[:, idx]

        target_metrics[target] = {
            "mae": float(np.mean(np.abs(target_residuals))),
            "rmse": float(np.sqrt(np.mean(np.square(target_residuals)))),
            "bias": float(np.mean(target_residuals))
        }

    metrics: Dict[str, Any] = {
        "samples": int(len(residual_df)),
        "mae": mae_value,
        "rmse": rmse_value,
        "residual_norm_median": float(residual_df["residual_norm"].median()),
        "residual_norm_p95": float(residual_df["residual_norm"].quantile(0.95)),
        "targets": target_metrics
    }

    return residual_df, metrics


def summarize_run_performance(results_df: pd.DataFrame, epoch_summary: pd.DataFrame) -> pd.DataFrame:
    """Create a concise summary of key performance indicators."""
    if results_df.empty: return pd.DataFrame()

    best_epoch_idx = int(results_df["val_loss"].idxmin())

    best_row = results_df.loc[best_epoch_idx]

    final_row = results_df.iloc[-1]

    early_row = results_df.iloc[0]

    improvement = float(early_row["val_loss"] - best_row["val_loss"])

    consistency = float(epoch_summary["combined_loss_std"].tail(5).mean()) if not epoch_summary.empty else float("nan")

    best_r2_row = results_df.loc[results_df["r2_score"].idxmax()]

    summary = pd.DataFrame([
        {"metric": "Best validation loss", "value": best_row["val_loss"], "notes": f"Epoch {int(best_row['epoch'])}"},
        {"metric": "Final validation loss", "value": final_row["val_loss"], "notes": f"Train gap {final_row['train_val_gap']:.4f}"},
        {"metric": "Validation improvement", "value": improvement, "notes": "Drop from first to best epoch"},
        {"metric": "Validation stability (std last 5 epochs)", "value": consistency, "notes": "Lower is more stable"},
        {"metric": "Average epoch time (last 10 epochs)", "value": results_df["epoch_time"].tail(10).mean(), "notes": "Supports batch-size experiments"},
        {"metric": "Peak R²", "value": best_r2_row["r2_score"], "notes": f"Epoch {int(best_r2_row['epoch'])}"},
        {"metric": "Total recorded training time", "value": results_df["epoch_time"].sum(), "notes": "seconds"}
    ])

    return summary


def suggest_hyperparameters(model_config: Dict[str, Any], training_config: Dict[str, Any], config_history: pd.DataFrame, results_df: pd.DataFrame) -> pd.DataFrame:
    """Derive hyperparameter sweep recommendations from observed metrics."""
    if results_df.empty: return pd.DataFrame()

    suggestions: List[Dict[str, Any]] = []

    base_lr = float(model_config.get("learning_rate", 0.001))

    final_window = results_df.tail(5)

    val_loss_range = float(final_window["val_loss"].max() - final_window["val_loss"].min())

    best_epoch = int(results_df.loc[results_df["val_loss"].idxmin(), "epoch"])

    final_epoch = int(results_df.iloc[-1]["epoch"])

    total_epochs = int(training_config.get("epochs", final_epoch + 1))

    if val_loss_range < 0.01 and final_epoch - best_epoch > 5:
        proposals = sorted({round(base_lr * factor, 6) for factor in (0.5, 0.8, 1.2)})

        suggestions.append({
            "parameter": "learning_rate",
            "proposed_values": proposals,
            "rationale": "Validation loss plateaued across the last epochs; nudge the optimizer step to reintroduce progress.",
            "constraints": "Keep BinaryWeightConstraintMax(max_binary_digits=5) engaged for stability."
        })

    train_val_gap = float(final_window["train_val_gap"].mean())

    if train_val_gap > 0.05:
        suggestions.append({
            "parameter": "dropout_rate",
            "proposed_values": [0.05, 0.1, 0.15],
            "rationale": "Consistent validation > training loss points to mild overfitting; mild dropout can regularize activations.",
            "constraints": "Retain enable_weight_oscillation_dampener=True to temper weight swings."
        })

    avg_epoch_time = float(results_df["epoch_time"].tail(10).mean())

    memory_headroom = float(results_df["memory_headroom_mb"].tail(10).mean())

    if avg_epoch_time < 1.5 and memory_headroom > 0:
        baseline_batch = int(training_config.get("batch_size", 16))

        candidate_batches = sorted({baseline_batch, 24, 32})

        suggestions.append({
            "parameter": "batch_size",
            "proposed_values": candidate_batches,
            "rationale": "Epoch time and memory logs show headroom; larger batches could reduce gradient variance.",
            "constraints": "Validate GPU memory against peak usage before committing."
        })

    if final_epoch >= total_epochs - 2:
        extension_epochs = sorted({total_epochs + 10, total_epochs + 20})

        suggestions.append({
            "parameter": "epochs",
            "proposed_values": extension_epochs,
            "rationale": "Best epoch occurs near training ceiling; extending training may unlock additional gains.",
            "constraints": "Monitor for overfitting; stop early if val loss degrades."
        })

    if not config_history.empty and "learning_rate" in config_history.columns:
        grouped = config_history.groupby("learning_rate")["final_r2"].mean().sort_values()

        if len(grouped) > 1:
            top_lr = grouped.idxmax()

            if abs(top_lr - base_lr) / base_lr > 0.2:
                suggestions.append({
                    "parameter": "learning_rate",
                    "proposed_values": [round(float(top_lr), 6)],
                    "rationale": "Historical sweep points to a different learning rate yielding higher final R².",
                    "constraints": "Pair with BinaryWeightConstraintChanges() to keep update granularity consistent."
                })

    if suggestions:
        recommendations = pd.DataFrame(suggestions)

        return recommendations.drop_duplicates(subset=["parameter", "rationale"])

    return pd.DataFrame()

In [None]:
paths = resolve_project_paths()

display(Markdown(f"**Project root:** `{paths['project_root']}`"))

artifact_status = validate_required_artifacts(paths)

display(Markdown("### Artifact Inventory"))

display(artifact_status)

missing_artifacts = artifact_status.loc[~artifact_status["exists"]]

if not missing_artifacts.empty:
    display(Markdown("⚠️ **Missing artifacts detected. Review notes before continuing.**"))

    display(missing_artifacts)
else:
    display(Markdown("✅ All critical artifacts are present."))

**Project root:** `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python`

### Artifact Inventory

Unnamed: 0,artifact,critical,exists,path,size_bytes,size_readable,modified,note
0,configuration_log,True,True,training_output\configuration_log.csv,3666.0,3.6 KB,2025-09-30 18:49:41.074594975,
1,loss_history,True,True,training_output\loss_history.csv,1434016.0,1.4 MB,2025-09-30 18:49:41.202371597,
2,model_config,True,True,ml_config\model_config.json,263.0,263.0 B,2025-09-30 07:43:10.036819696,
3,particle_data,True,True,particle_data.csv,251578.0,245.7 KB,2025-09-30 06:52:34.713520765,Regenerate via data pipeline if missing.
4,scaler_X,True,True,scaler_X.pkl,807.0,807.0 B,2025-09-30 18:38:17.606033087,Rebuilt automatically through complete_data_pi...
5,scaler_y,True,True,scaler_y.pkl,759.0,759.0 B,2025-09-30 18:38:17.607032776,Rebuilt automatically through complete_data_pi...
6,training_config,True,True,ml_config\training_config.json,37.0,37.0 B,2025-09-30 18:38:03.289540291,
7,training_results,True,True,training_output\training_results.csv,99525.0,97.2 KB,2025-09-30 18:49:41.072592497,
8,analysis_dir,False,True,training_output\analysis,,,2025-09-30 07:11:31.113477707,
9,figures_dir,False,True,training_output\analysis\figures,,,2025-09-30 07:12:11.038027287,


✅ All critical artifacts are present.

In [None]:
model_config, training_config, config_history = load_configs(paths)

display(Markdown("### Active Model Configuration"))

display(pd.Series(model_config, name="model_config"))

display(Markdown("### Active Training Configuration"))

display(pd.Series(training_config, name="training_config"))

if not config_history.empty:
    display(Markdown("### Historical Configuration Snapshots"))

    history_columns = [
        col
        for col in [
            "timestamp", "config_id", "learning_rate", "dropout_rate", "train_batch_size", "train_epochs", "best_r2", "final_r2", "r2_delta", "avg_epoch_time_logged", "avg_epoch_time_calc", "total_training_time"
        ]
        if col in config_history.columns
    ]

    display(config_history[history_columns])

    numeric_cols = [col for col in history_columns if config_history[col].dtype.kind in "if"]

    if numeric_cols:
        history_stats = config_history[numeric_cols].describe().transpose()

        display(Markdown("#### Configuration Summary Statistics"))

        display(history_stats)

### Active Model Configuration

hidden_layers                         [64, 32, 16]
activation                                    relu
optimizer                                     adam
learning_rate                               0.0050
dropout_rate                                0.0500
enable_weight_oscillation_dampener            True
enable_binary_change_max                      True
max_additional_binary_digits                    16
Name: model_config, dtype: object

### Active Training Configuration

epochs        500
batch_size     16
Name: training_config, dtype: int64

### Historical Configuration Snapshots

Unnamed: 0,timestamp,config_id,learning_rate,dropout_rate,train_batch_size,train_epochs,best_r2,final_r2,r2_delta,avg_epoch_time_logged,avg_epoch_time_calc,total_training_time
0,2025-09-30 01:38:08.986529,training_config_20250930_013808,0.005,0.0,16,60,0.8562,0.8506,0.0056,1.0383,1.0403,62.4201
1,2025-09-30 02:18:40.642721,training_config_20250930_021840,0.0025,0.0,16,80,0.8469,0.8446,0.0023,1.14,1.1439,91.511
2,2025-09-30 02:20:25.365475,training_config_20250930_022025,0.0025,0.0,16,80,0.846,0.8323,0.0138,1.1985,1.2028,96.2257
3,2025-09-30 02:23:38.130602,training_config_20250930_022338,0.005,0.05,16,90,0.8743,0.8608,0.0134,1.3518,1.3554,121.9817
4,2025-09-30 02:27:07.080559,training_config_20250930_022707,0.005,0.05,16,110,0.8642,0.8512,0.013,1.3635,1.367,150.3698
5,2025-09-30 02:30:30.378506,training_config_20250930_023030,0.005,0.05,16,130,0.8666,0.838,0.0286,1.2568,1.2605,163.8691
6,2025-09-30 02:34:24.444506,training_config_20250930_023424,0.005,0.05,16,150,0.8689,0.8559,0.013,1.3037,1.3066,195.9893
7,2025-09-30 02:39:11.458678,training_config_20250930_023911,0.0025,0.05,16,160,0.8555,0.8454,0.0101,1.2773,1.2803,204.8531
8,2025-09-30 02:50:12.309857,training_config_20250930_025012,0.005,0.05,16,170,0.8695,0.8538,0.0156,1.2712,1.275,216.7573
9,2025-09-30 02:54:36.727654,training_config_20250930_025436,0.005,0.05,16,190,0.8773,0.8596,0.0177,1.2478,1.2512,237.7279


#### Configuration Summary Statistics

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
learning_rate,21.0,0.0046,0.0009,0.0025,0.005,0.005,0.005,0.005
dropout_rate,21.0,0.0429,0.0179,0.0,0.05,0.05,0.05,0.05
train_batch_size,21.0,16.0,0.0,16.0,16.0,16.0,16.0,16.0
train_epochs,21.0,230.4762,128.8983,60.0,130.0,210.0,320.0,500.0
best_r2,21.0,0.8655,0.0091,0.846,0.8614,0.8689,0.8718,0.8783
final_r2,21.0,0.8464,0.0104,0.8288,0.838,0.8484,0.8545,0.8608
r2_delta,21.0,0.0192,0.0104,0.0023,0.0134,0.0162,0.0264,0.0495
avg_epoch_time_logged,21.0,1.3407,0.1317,1.0383,1.2592,1.3542,1.4134,1.5628
avg_epoch_time_calc,21.0,1.3444,0.1322,1.0403,1.2627,1.3578,1.4169,1.5668
total_training_time,21.0,321.0268,194.7059,62.4201,163.8691,265.1614,454.8581,682.6939


In [None]:
analytics = load_training_logs(paths)

loss_records = analytics["loss_records"]

epoch_summary = analytics["epoch_summary"]

results_df = analytics["results"]

merged_metrics = analytics["merged_metrics"]

display(Markdown("### Epoch-Level Performance Summary"))

display(results_df.tail(10)[["epoch", "train_loss", "val_loss", "train_val_gap", "val_loss_delta", "epoch_time"]])

performance_snapshot = summarize_run_performance(results_df, epoch_summary)

display(Markdown("### Key Performance Indicators"))

display(performance_snapshot)

display(Markdown("#### Loss Distribution by Epoch"))

display(epoch_summary.tail(10))

display(Markdown("#### Exponential Moving Average of Combined Loss"))

display(loss_records.tail(10)[["epoch", "combined_loss", "loss_ewm"]])

### Epoch-Level Performance Summary

Unnamed: 0,epoch,train_loss,val_loss,train_val_gap,val_loss_delta,epoch_time
490,490,0.1665,0.1778,0.0113,0.0146,1.3124
491,491,0.1771,0.1673,-0.0098,-0.0106,1.3061
492,492,0.1718,0.1647,-0.0071,-0.0025,1.4694
493,493,0.1725,0.1778,0.0053,0.0131,1.4071
494,494,0.1859,0.1715,-0.0144,-0.0063,1.4745
495,495,0.1809,0.1589,-0.0221,-0.0126,1.7862
496,496,0.1779,0.1641,-0.0138,0.0052,1.477
497,497,0.1766,0.1606,-0.016,-0.0035,1.4746
498,498,0.184,0.1691,-0.0149,0.0085,1.732
499,499,0.1851,0.1711,-0.014,0.002,1.3508


### Key Performance Indicators

Unnamed: 0,metric,value,notes
0,Best validation loss,0.1409,Epoch 129
1,Final validation loss,0.1711,Train gap -0.0140
2,Validation improvement,0.5788,Drop from first to best epoch
3,Validation stability (std last 5 epochs),0.0439,Lower is more stable
4,Average epoch time (last 10 epochs),1.479,Supports batch-size experiments
5,Peak R²,0.8735,Epoch 129
6,Total recorded training time,680.9495,seconds


#### Loss Distribution by Epoch

Unnamed: 0,epoch,combined_loss_mean,combined_loss_std,mae_mean,mse_mean
489,489,0.1665,0.0366,0.2235,0.1096
490,490,0.1771,0.0334,0.2353,0.1189
491,491,0.1718,0.0422,0.2282,0.1154
492,492,0.1725,0.0366,0.2293,0.1156
493,493,0.1859,0.038,0.2414,0.1304
494,494,0.1809,0.0459,0.2374,0.1245
495,495,0.1779,0.0367,0.2331,0.1227
496,496,0.1766,0.0401,0.232,0.1211
497,497,0.184,0.0464,0.2375,0.1305
498,498,0.1851,0.0505,0.2393,0.131


#### Exponential Moving Average of Combined Loss

Unnamed: 0,epoch,combined_loss,loss_ewm
19990,498,0.1459,0.194
19991,498,0.1912,0.1936
19992,498,0.2639,0.2042
19993,498,0.1173,0.1911
19994,498,0.2059,0.1933
19995,498,0.1809,0.1915
19996,498,0.1701,0.1883
19997,498,0.1513,0.1827
19998,498,0.1542,0.1784
19999,498,0.1763,0.1781


In [None]:
particle_df = load_particle_data(paths)

scaler_X, scaler_y = load_scalers(paths)

display(Markdown("### Particle Data Snapshot"))

display(Markdown(f"Dataset shape: **{particle_df.shape[0]}** rows × **{particle_df.shape[1]}** columns"))

display(particle_df.head())

display(Markdown("#### Descriptive Statistics"))

display(particle_df.describe(include="all").transpose())

missing_counts = particle_df.isna().sum()

if missing_counts.any():
    display(Markdown("#### Missing Value Audit"))

    display(missing_counts[missing_counts > 0])

Loaded particle data from c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\particle_data.csv (1000 particles)


### Particle Data Snapshot

Dataset shape: **1000** rows × **15** columns

Unnamed: 0,particle_id,mass,initial_velocity_x,initial_velocity_y,initial_position_x,initial_position_y,charge,magnetic_field_strength,simulation_time,final_velocity_x,final_velocity_y,final_position_x,final_position_y,kinetic_energy,trajectory_length
0,1,3.8079,-3.1487,-2.3829,3.4541,1.4399,1,1.2939,3.6656,1.2294,-3.7246,-9.4442,1.1308,29.2917,12.902
1,2,9.5121,0.419,-2.5302,5.9336,6.1086,1,0.4498,6.1506,1.0007,-2.3032,6.4958,-10.2526,29.9911,16.3709
2,3,7.3467,3.7295,4.0625,-4.9906,5.2032,1,0.3007,4.9767,2.9367,4.8254,16.411,22.5882,117.2113,27.573
3,4,6.0267,2.3222,-2.5045,2.4975,-6.922,0,1.6444,4.1205,2.4379,-2.4846,13.1317,-14.84,36.5107,13.2583
4,5,1.6446,3.0656,-2.2805,1.4349,-7.015,1,1.1996,7.7181,1.129,-3.5859,-1.5585,-5.7997,11.6218,3.2307


#### Descriptive Statistics

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
particle_id,1000.0,500.5,288.8194,1.0,250.75,500.5,750.25,1000.0
mass,1000.0,4.9535,2.8922,0.1459,2.4361,5.0184,7.4688,9.9972
initial_velocity_x,1000.0,0.0702,2.9219,-4.9678,-2.5893,0.1873,2.6047,4.9941
initial_velocity_y,1000.0,0.0241,2.9067,-4.9999,-2.3865,0.0061,2.591,4.9782
initial_position_x,1000.0,-0.1925,5.7298,-9.9869,-5.162,-0.3144,4.7508,9.9912
initial_position_y,1000.0,-0.1179,5.7362,-9.9994,-5.1005,-0.108,4.7999,9.955
charge,1000.0,-0.008,0.8091,-1.0,-1.0,0.0,1.0,1.0
magnetic_field_strength,1000.0,1.0462,0.5518,0.1074,0.5624,1.0473,1.537,1.999
simulation_time,1000.0,5.3997,2.5677,1.0022,3.1719,5.3375,7.5665,9.9929
final_velocity_x,1000.0,0.1078,2.9209,-7.0916,-2.2262,0.0838,2.4793,6.9006


In [None]:
checkpoint_index = list_checkpoint_weights(paths)

display(Markdown("### Available Weight Checkpoints"))

if checkpoint_index.empty:
    display(Markdown("No checkpoints found. Run training to generate weight artifacts."))
else:
    display(checkpoint_index)

model, checkpoint_meta = load_model_checkpoint(paths, model_config, training_config, checkpoint_index)

if checkpoint_meta is not None:
    display(Markdown(f"Loaded checkpoint: **epoch {checkpoint_meta['epoch']}** from `{checkpoint_meta['weights_path']}`"))

    display(pd.Series(checkpoint_meta))

### Available Weight Checkpoints

Unnamed: 0,epoch,name,path,modified,size_bytes,size_readable,is_latest
0,0,model_weights_epoch_0.weights.h5,model_weights_epoch_0.weights.h5,2025-09-30 18:38:19.229242325,72640,70.9 KB,False
1,10,model_weights_epoch_10.weights.h5,model_weights_epoch_10.weights.h5,2025-09-30 18:38:36.287689686,72640,70.9 KB,False
2,20,model_weights_epoch_20.weights.h5,model_weights_epoch_20.weights.h5,2025-09-30 18:38:50.502147197,72640,70.9 KB,False
3,30,model_weights_epoch_30.weights.h5,model_weights_epoch_30.weights.h5,2025-09-30 18:39:05.347838879,72640,70.9 KB,False
4,40,model_weights_epoch_40.weights.h5,model_weights_epoch_40.weights.h5,2025-09-30 18:39:19.369039297,72640,70.9 KB,False
...,...,...,...,...,...,...,...
64,460,model_weights_epoch_460.weights.h5,model_weights_epoch_460.weights.h5,2025-09-30 18:48:46.599674463,72640,70.9 KB,False
65,470,model_weights_epoch_470.weights.h5,model_weights_epoch_470.weights.h5,2025-09-30 18:48:59.908305883,72640,70.9 KB,False
66,480,model_weights_epoch_480.weights.h5,model_weights_epoch_480.weights.h5,2025-09-30 18:49:13.712352037,72640,70.9 KB,False
67,490,model_weights_epoch_490.weights.h5,model_weights_epoch_490.weights.h5,2025-09-30 18:49:26.919478178,72640,70.9 KB,False


  saveable.load_own_variables(weights_store.get(inner_path))


Loaded checkpoint: **epoch 499** from `model_weights_epoch_499.weights.h5`

epoch                                             499
weights_path       model_weights_epoch_499.weights.h5
size_bytes                                      72640
size_readable                                 70.9 KB
modified                2025-09-30 18:49:40.416439056
parameter_count                                  3286
dtype: object

In [None]:
residuals_df, residual_metrics = compute_predictions(model, scaler_X, scaler_y, particle_df)

if residual_metrics:
    display(Markdown("### Residual Metrics"))

    overall_metrics = {key: value for key, value in residual_metrics.items() if key != "targets"}

    display(pd.Series(overall_metrics, name="residual_metrics"))

    target_metrics = pd.DataFrame(residual_metrics["targets"]).transpose()

    display(Markdown("#### Per-Target Residual Summary"))

    display(target_metrics)

if not residuals_df.empty:
    display(Markdown("### Residual Sample"))

    display(residuals_df.head())

### Residual Metrics

samples                256.0000
mae                      3.3028
rmse                     5.6024
residual_norm_median     9.5438
residual_norm_p95       25.9245
Name: residual_metrics, dtype: float64

#### Per-Target Residual Summary

Unnamed: 0,mae,rmse,bias
final_velocity_x,1.0376,1.6686,0.0187
final_velocity_y,0.9816,1.521,0.045
final_position_x,3.4223,4.7229,0.6597
final_position_y,3.5767,4.712,-0.5081
kinetic_energy,8.0683,11.0739,-0.6615
trajectory_length,2.7304,4.0102,-0.8742


### Residual Sample

Unnamed: 0,particle_id,actual_final_velocity_x,pred_final_velocity_x,residual_final_velocity_x,actual_final_velocity_y,pred_final_velocity_y,residual_final_velocity_y,actual_final_position_x,pred_final_position_x,residual_final_position_x,...,pred_final_position_y,residual_final_position_y,actual_kinetic_energy,pred_kinetic_energy,residual_kinetic_energy,actual_trajectory_length,pred_trajectory_length,residual_trajectory_length,residual_norm,residual_norm_z
2,3,2.9367,2.3548,-0.5818,4.8254,4.9024,0.0771,16.411,16.3485,-0.0626,...,19.1873,-3.4009,117.2113,107.9501,-9.2612,27.573,25.7147,-1.8583,10.0567,-0.1737
3,4,2.4379,2.215,-0.2228,-2.4846,-2.2138,0.2708,13.1317,12.5062,-0.6255,...,-15.225,-0.385,36.5107,35.4289,-1.0818,13.2583,14.9364,1.6781,2.1561,-1.2052
10,11,5.0235,-3.5097,-8.5332,-2.0279,2.3111,4.339,2.4081,5.6124,3.2042,...,-6.6328,2.6693,4.4577,13.5258,9.068,1.431,4.9853,3.5543,14.2793,0.3777
23,24,4.4347,-2.8499,-7.2846,-4.7552,0.7821,5.5373,38.0604,8.0618,-29.9985,...,-20.1097,12.6435,78.7841,54.1766,-24.6075,57.0734,33.2629,-23.8106,48.1245,4.7966
25,26,-0.2407,-0.6813,-0.4405,-2.1335,-2.7567,-0.6232,-4.47,-1.76,2.71,...,-11.7032,-0.0278,18.1469,24.9567,6.8098,18.2666,16.5501,-1.7165,7.5662,-0.4988


In [None]:
figures_dir = paths["figures_dir"]

# Loss trend
fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(data=results_df, x="epoch", y="train_loss", ax=ax, label="Train Loss")
sns.lineplot(data=results_df, x="epoch", y="val_loss", ax=ax, label="Validation Loss")
val_std = results_df["val_loss"].rolling(5, min_periods=1).std()
ax.fill_between(results_df["epoch"], results_df["val_loss"] - val_std, results_df["val_loss"] + val_std, color="tab:blue", alpha=0.1)
ax.set_title("Training vs Validation Loss")
ax.set_ylabel("Loss")
fig.tight_layout()
loss_curve_path = figures_dir / "loss_curves.png"
fig.savefig(loss_curve_path, dpi=200)
plt.close(fig)
display(Markdown(f"Saved loss curves to `{loss_curve_path}`"))

# Train vs validation gap
fig, ax = plt.subplots(figsize=(10, 4))
sns.lineplot(data=results_df, x="epoch", y="train_val_gap", ax=ax, color="tab:red")
ax.axhline(0, linestyle="--", color="grey", linewidth=1)
ax.set_title("Train vs Validation Gap")
ax.set_ylabel("Val - Train Loss")
fig.tight_layout()
gap_plot_path = figures_dir / "train_val_gap.png"
fig.savefig(gap_plot_path, dpi=200)
plt.close(fig)
display(Markdown(f"Saved train/val gap chart to `{gap_plot_path}`"))

# Learning rate vs final loss metrics
if not config_history.empty:
    lr_df = config_history.copy()

    fig, ax = plt.subplots(figsize=(8, 5))
    sns.scatterplot(data=lr_df, x="learning_rate", y="final_r2", size="total_training_time", hue="final_r2", palette="viridis", ax=ax)
    ax.set_title("Learning Rate vs Final R²")
    ax.set_xlabel("Learning Rate")
    ax.set_ylabel("Final R²")
    fig.tight_layout()
    lr_plot_path = figures_dir / "learning_rate_vs_r2.png"
    fig.savefig(lr_plot_path, dpi=200)
    plt.close(fig)
    display(Markdown(f"Saved learning-rate diagnostics to `{lr_plot_path}`"))

# Residual histogram
if not residuals_df.empty:
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.histplot(residuals_df["residual_norm"], bins=30, ax=ax, kde=True, color="tab:orange")
    ax.set_title("Residual Norm Distribution")
    ax.set_xlabel("Residual Norm")
    fig.tight_layout()
    residual_hist_path = figures_dir / "residual_norm_hist.png"
    fig.savefig(residual_hist_path, dpi=200)
    plt.close(fig)
    display(Markdown(f"Saved residual histogram to `{residual_hist_path}`"))

    residual_columns = [col for col in residuals_df.columns if col.startswith("residual_") and any(col.endswith(target) for target in OUTPUT_TARGETS)]

    if residual_columns:
        fig, ax = plt.subplots(figsize=(10, 5))
        melted = residuals_df[residual_columns].melt(var_name="target", value_name="residual")
        sns.boxplot(data=melted, x="target", y="residual", ax=ax)
        ax.tick_params(axis="x", rotation=45)
        ax.set_title("Residual Distribution by Target")
        fig.tight_layout()
        residual_box_path = figures_dir / "residual_distribution_by_target.png"
        fig.savefig(residual_box_path, dpi=200)
        plt.close(fig)
        display(Markdown(f"Saved residual distribution boxplot to `{residual_box_path}`"))

# Correlation heatmap
heatmap_features = ["train_loss", "val_loss", "train_mae", "val_mae", "r2_score", "epoch_time", "train_val_gap", "memory_headroom_mb"]
usable_cols = [col for col in heatmap_features if col in merged_metrics.columns]

if usable_cols:
    corr_matrix = merged_metrics[usable_cols].corr()

    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
    ax.set_title("Metric Correlation Heatmap")
    fig.tight_layout()
    heatmap_path = figures_dir / "metric_correlation_heatmap.png"
    fig.savefig(heatmap_path, dpi=200)
    plt.close(fig)
    display(Markdown(f"Saved correlation heatmap to `{heatmap_path}`"))

Saved loss curves to `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\training_output\analysis\figures\loss_curves.png`

Saved train/val gap chart to `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\training_output\analysis\figures\train_val_gap.png`

Saved learning-rate diagnostics to `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\training_output\analysis\figures\learning_rate_vs_r2.png`

Saved residual histogram to `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\training_output\analysis\figures\residual_norm_hist.png`

Saved residual distribution boxplot to `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\training_output\analysis\figures\residual_distribution_by_target.png`

Saved correlation heatmap to `c:\Users\jesse\OneDrive\Documents\Programming Projects\Neural Network Lab - Python\neural-network-lab-python\training_output\analysis\figures\metric_correlation_heatmap.png`

In [None]:
recommendations_df = suggest_hyperparameters(model_config, training_config, config_history, results_df)

if not recommendations_df.empty:
    display(Markdown("### Recommended Hyperparameter Sweeps"))

    display(recommendations_df)

else:
    display(Markdown("No immediate hyperparameter adjustments detected beyond current configuration."))

### Recommended Hyperparameter Sweeps

Unnamed: 0,parameter,proposed_values,rationale,constraints
0,batch_size,"[16, 24, 32]",Epoch time and memory logs show headroom; larg...,Validate GPU memory against peak usage before ...
1,epochs,"[510, 520]",Best epoch occurs near training ceiling; exten...,Monitor for overfitting; stop early if val los...


In [None]:
insight_items: List[str] = []

if not results_df.empty:
    final_row = results_df.iloc[-1]

    best_row = results_df.loc[results_df["val_loss"].idxmin()]

    insight_items.append(f"Best validation loss {best_row['val_loss']:.4f} at epoch {int(best_row['epoch'])}.")

    insight_items.append(f"Validation plateau range over last window: {(results_df.tail(5)['val_loss'].max() - results_df.tail(5)['val_loss'].min()):.4f}.")

    insight_items.append(f"Train/val gap at final epoch: {final_row['train_val_gap']:.4f}.")

    insight_items.append(f"Cumulative training time logged: {results_df['epoch_time'].sum():.1f} seconds.")

if residual_metrics:
    insight_items.append(f"Mean absolute residual across sampled predictions: {residual_metrics['mae']:.4f}.")

    insight_items.append(f"95th percentile residual norm: {residual_metrics['residual_norm_p95']:.4f}.")

if not recommendations_df.empty:
    suggested = ", ".join(recommendations_df["parameter"].unique())

    insight_items.append(f"Hyperparameter sweep targets: {suggested}.")

missing_artifacts = artifact_status.loc[~artifact_status["exists"] & artifact_status["critical"]]

if not missing_artifacts.empty:
    missing_list = ", ".join(missing_artifacts["artifact"].tolist())

    insight_items.append(f"Critical artifacts missing: {missing_list}.")

if not insight_items:
    insight_items.append("Insufficient data to derive insights.")

display(Markdown("### Insight Summary"))

for item in insight_items:
    display(Markdown(f"- {item}"))

### Insight Summary

- Best validation loss 0.1409 at epoch 129.

- Validation plateau range over last window: 0.0122.

- Train/val gap at final epoch: -0.0140.

- Cumulative training time logged: 680.9 seconds.

- Mean absolute residual across sampled predictions: 3.3028.

- 95th percentile residual norm: 25.9245.

- Hyperparameter sweep targets: batch_size, epochs.

In [None]:
def run_notebook_smoke_test() -> Dict[str, Any]:
    """Validate that core notebook stages complete without exceptions."""
    status = {
        "critical_artifacts_present": bool(artifact_status.loc[artifact_status["critical"] & ~artifact_status["exists"]].empty),
        "config_history_entries": int(len(config_history)),
        "loss_records": int(len(loss_records)),
        "results_records": int(len(results_df)),
        "residual_samples": int(len(residuals_df)),
        "recommendations": int(len(recommendations_df)),
        "figures_exported": len(list(paths["figures_dir"].glob("*.png"))),
        "latest_checkpoint_epoch": int(checkpoint_meta["epoch"]) if checkpoint_meta else None
    }

    return status


smoke_test_status = run_notebook_smoke_test()

display(Markdown("### Validation Checklist"))

display(pd.Series(smoke_test_status, name="notebook_validation"))

### Validation Checklist

critical_artifacts_present     True
config_history_entries           21
loss_records                  20000
results_records                 500
residual_samples                256
recommendations                   2
figures_exported                  6
latest_checkpoint_epoch         499
Name: notebook_validation, dtype: object

## Actionable Next Steps

- Re-run the training pipeline after trialing the proposed learning-rate, dropout, and batch-size combinations; capture new config snapshots for comparison.
- Promote saved figures under `training_output/analysis/figures/` into experiment reports or dashboards.
- Extend this notebook with automated sweeps (GridSearch or Bayesian optimization) once additional configuration diversity is available.

### Reuse Tips

- Parameterize `sample_size` within `compute_predictions` to scale residual analysis for larger datasets.
- Import this notebook’s helper functions via `%run experiment_analysis_framework.ipynb` inside future analysis notebooks for rapid setup.
- Store additional diagnostics (e.g., feature importance, SHAP values) within the `analysis` directory for cross-experiment benchmarking.