In [None]:
# 01_model_validation.ipynb

# Make src/ importable when running from the repo root + /notebooks
import sys
from pathlib import Path

REPO_ROOT = Path("..").resolve()
SRC_PATH = REPO_ROOT / "src"
if str(SRC_PATH) not in sys.path:
    sys.path.insert(0, str(SRC_PATH))

# Core imports
import numpy as np
import matplotlib.pyplot as plt

from triangle_time.data_io import load_tasks_from_csv
from triangle_time.training import fit_model, evaluate_model
from triangle_time.triangle_model import (
    update_task_proportions,
    predict_time_for_task,
    predict_time_from_proportions,
)
from triangle_time.schema import ModelParams


In [None]:
# Path to your sample CSV
csv_path = REPO_ROOT / "data" / "samples" / "example_tasks.csv"

tasks = load_tasks_from_csv(str(csv_path))

len(tasks), tasks[0]


In [None]:
# Fit the full model with entropy/mixing term
params_entropy = fit_model(tasks, use_entropy=True)
params_entropy


In [None]:
# Fit a simpler baseline without entropy for comparison
params_base = fit_model(tasks, use_entropy=False)
params_base


In [None]:
metrics_entropy = evaluate_model(tasks, params_entropy)
metrics_base = evaluate_model(tasks, params_base)

print("=== Entropy model ===")
for k, v in metrics_entropy.items():
    print(f"{k}: {v}")

print("\n=== Base model (no entropy) ===")
for k, v in metrics_base.items():
    print(f"{k}: {v}")


In [None]:
# Choose which params to visualize
params = params_entropy  # or params_base

y_true = []
y_pred = []

for t in tasks:
    # compute ground truth total time
    if t.T_total is None or t.T_total <= 0:
        total = t.T_gov + t.T_azure + t.T_ds
    else:
        total = t.T_total

    if total <= 0:
        continue

    y_true.append(total)
    y_pred.append(predict_time_for_task(t, params))

y_true = np.array(y_true, dtype=float)
y_pred = np.array(y_pred, dtype=float)

y_true[:5], y_pred[:5]


In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_true, y_pred, alpha=0.7)
min_val = min(y_true.min(), y_pred.min())
max_val = max(y_true.max(), y_pred.max())
plt.plot([min_val, max_val], [min_val, max_val], linestyle="--")
plt.xlabel("Actual time")
plt.ylabel("Predicted time")
plt.title("Triangle time model â€“ actual vs predicted")
plt.grid(True)
plt.show()


In [None]:
sample_task = update_task_proportions(tasks[0])

print("Task ID:", sample_task.task_id)
print("T_gov:", sample_task.T_gov)
print("T_azure:", sample_task.T_azure)
print("T_ds:", sample_task.T_ds)
print("T_total:", sample_task.T_total)

print("\nProportions (p_gov, p_azure, p_ds):")
print(sample_task.p_gov, sample_task.p_azure, sample_task.p_ds)

pred_time = predict_time_for_task(sample_task, params_entropy)
print("\nPredicted time (entropy model):", pred_time)


In [None]:
# Take the same task and experiment with different mixes
p_gov_orig = sample_task.p_gov
p_azure_orig = sample_task.p_azure
p_ds_orig = sample_task.p_ds

print("Original proportions:")
print("  p_gov:  ", p_gov_orig)
print("  p_azure:", p_azure_orig)
print("  p_ds:   ", p_ds_orig)

# Construct a few hypothetical mixes:
scenarios = [
    ("Original", p_gov_orig, p_azure_orig, p_ds_orig),
    ("Less gov, more Azure", max(p_gov_orig - 0.2, 0.0), min(p_azure_orig + 0.2, 1.0), p_ds_orig),
    ("Less gov, more DS", max(p_gov_orig - 0.2, 0.0), p_azure_orig, min(p_ds_orig + 0.2, 1.0)),
    ("Balanced (1/3 each)", 1/3, 1/3, 1/3),
]

print("\nScenario predictions (entropy model):")
for name, p_gov, p_azure, p_ds in scenarios:
    # Normalize in case we broke the sum=1 constraint:
    total_p = p_gov + p_azure + p_ds
    if total_p <= 0:
        continue
    p_gov_n = p_gov / total_p
    p_azure_n = p_azure / total_p
    p_ds_n = p_ds / total_p

    t_pred = predict_time_from_proportions(
        p_gov_n, p_azure_n, p_ds_n, params_entropy
    )

    print(
        f"{name:22s} | "
        f"p_gov={p_gov_n:.2f}, p_azure={p_azure_n:.2f}, p_ds={p_ds_n:.2f} "
        f"=> T_pred={t_pred:.2f}"
    )
