# CoachOS (ICHack) – training notebook

Built by Issa + Tay.

**What this notebook does (run top to bottom):**
- Pulls StatsBomb open event data from GitHub
- Trains 3 small supervised models: pass completion, shot goal (xG proxy), win probability
- Computes xT (expected threat) for passes + carries
- Exports demo CSVs + trained `joblib` models into a single zip you can drop into our FastAPI/Nuxt app

**Quick run tips**
- In a rush: set `N_MATCHES = 30`
- Better models: set `N_MATCHES = 100` (slower)

Colab note: your working folder is `/content` (not `/mnt/data`).


In [None]:
# Colab setup
!pip -q install pandas numpy scikit-learn tqdm joblib

import os, json, math, random
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import GroupShuffleSplit
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, brier_score_loss
import joblib

# Clone StatsBomb Open Data repo (GitHub)
if not Path("/content/open-data").exists():
    !git clone --depth 1 https://github.com/statsbomb/open-data.git /content/open-data

BASE = Path("/content/open-data/data")
assert (BASE / "competitions.json").exists(), "Open data repo not found. Check the git clone step."
print("Open Data folder OK:", BASE)

## Step 1 – Choose what to train on

Pick a competition/season (optional) and how many matches to train on.

- If `COMPETITION_NAME` and `SEASON_NAME` are `None`, we sample across the catalogue.
- `N_MATCHES` controls runtime vs quality.

After the next cell you should see which competition/season got picked and how many matches we are pulling.


In [None]:
# User configurable
COMPETITION_NAME = None   # e.g. "FIFA World Cup"
SEASON_NAME = None        # e.g. "2018"

N_MATCHES = 60            # good hackathon default: 40 to 120
RANDOM_SEED = 7
DEMO_MATCH_ID = None      # set to a specific match_id if you want, else it will pick one from the loaded set

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

## Step 2 – Load match list + events JSON

We load the competitions file, pick matches, then pull each match’s events JSON into memory for feature extraction.


In [None]:
def load_json(path: Path):
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

competitions = load_json(BASE / "competitions.json")

def pick_competition_and_season(competition_name=None, season_name=None):
    rows = competitions
    if competition_name is not None:
        rows = [r for r in rows if r.get("competition_name") == competition_name]
    if season_name is not None:
        rows = [r for r in rows if r.get("season_name") == season_name]
    if not rows:
        raise ValueError("No competitions matched your filters. Try leaving them as None.")
    return random.choice(rows)

def load_match_ids(competition_id: int, season_id: int):
    p = BASE / "matches" / str(competition_id) / f"{season_id}.json"
    matches = load_json(p)
    return [m["match_id"] for m in matches], matches

def load_events(match_id: int):
    p = BASE / "events" / f"{match_id}.json"
    return load_json(p)

picked = pick_competition_and_season(COMPETITION_NAME, SEASON_NAME)
competition_id = picked["competition_id"]
season_id = picked["season_id"]
print("Picked:", picked["competition_name"], "| season:", picked["season_name"], "| ids:", competition_id, season_id)

match_ids, match_meta = load_match_ids(competition_id, season_id)
print("Matches in this competition season:", len(match_ids))

# Sample matches
sample_ids = match_ids[:]
random.shuffle(sample_ids)
sample_ids = sample_ids[:min(N_MATCHES, len(sample_ids))]
print("Training matches sampled:", len(sample_ids))

## Step 3 – xT helpers (grid lookup + delta)

We use a standard 12×8 xT grid.

These helpers:
- map StatsBomb (x, y) coordinates to a grid cell
- look up `xT(start)` and `xT(end)`
- compute `Delta xT = xT(end) - xT(start)` for passes and carries

We also try to be possession aware using the next event’s `possession_team` (so we do not over-credit moves that instantly lose the ball).


In [None]:
# Karun Singh open xT grid, 8 (y) x 12 (x)
XT_GRID = np.array([
    [0.00638303, 0.00779616, 0.00844854, 0.00977659, 0.01126267, 0.01248344, 0.01473596, 0.01745060, 0.02122129, 0.02756312, 0.03485072, 0.03792590],
    [0.00750072, 0.00878589, 0.00942382, 0.01059490, 0.01214719, 0.01384540, 0.01611813, 0.01870347, 0.02401521, 0.02953272, 0.04066992, 0.04647721],
    [0.00887990, 0.00977745, 0.01001304, 0.01110462, 0.01269174, 0.01429128, 0.01685596, 0.01935132, 0.02412240, 0.02855202, 0.05491138, 0.06442595],
    [0.00941056, 0.01082722, 0.01016549, 0.01132376, 0.01262646, 0.01484598, 0.01689528, 0.01997070, 0.02385149, 0.03511326, 0.10805102, 0.25745362],
    [0.00941056, 0.01082722, 0.01016549, 0.01132376, 0.01262646, 0.01484598, 0.01689528, 0.01997070, 0.02385149, 0.03511326, 0.10805102, 0.25745362],
    [0.00887990, 0.00977745, 0.01001304, 0.01110462, 0.01269174, 0.01429128, 0.01685596, 0.01935132, 0.02412240, 0.02855202, 0.05491138, 0.06442595],
    [0.00750072, 0.00878589, 0.00942382, 0.01059490, 0.01214719, 0.01384540, 0.01611813, 0.01870347, 0.02401521, 0.02953272, 0.04066992, 0.04647721],
    [0.00638303, 0.00779616, 0.00844854, 0.00977659, 0.01126267, 0.01248344, 0.01473596, 0.01745060, 0.02122129, 0.02756312, 0.03485072, 0.03792590],
], dtype=float)

def clip_xy(x, y):
    x = min(max(float(x), 0.0), 119.999)
    y = min(max(float(y), 0.0), 79.999)
    return x, y

def xt_value(x, y):
    x, y = clip_xy(x, y)
    x_bin = min(int(x / 10.0), 11)
    y_bin = min(int(y / 10.0), 7)
    return float(XT_GRID[y_bin, x_bin])

def get_end_location(ev):
    t = ev.get("type", {}).get("name")
    if t == "Pass":
        end = ev.get("pass", {}).get("end_location")
    elif t == "Carry":
        end = ev.get("carry", {}).get("end_location")
    else:
        end = None
    if not end:
        return None
    return end[0], end[1]

def compute_xt_actions(events):
    # events should be chronological by index
    events_sorted = sorted(events, key=lambda e: e.get("index", 0))
    next_pos_team = {}
    for i in range(len(events_sorted) - 1):
        next_pos_team[events_sorted[i].get("id")] = events_sorted[i+1].get("possession_team", {}).get("name")

    rows = []
    for ev in events_sorted:
        t = ev.get("type", {}).get("name")
        if t not in ("Pass", "Carry"):
            continue
        loc = ev.get("location")
        end = get_end_location(ev)
        if not loc or not end:
            continue

        team = ev.get("team", {}).get("name")
        player = ev.get("player", {}).get("name")
        minute = ev.get("minute", 0)
        second = ev.get("second", 0)

        sx, sy = loc[0], loc[1]
        ex, ey = end[0], end[1]

        xt_s = xt_value(sx, sy)
        kept = (next_pos_team.get(ev.get("id")) == team)
        xt_e = xt_value(ex, ey) if kept else 0.0

        dxt_kept = xt_e - xt_s
        dxt_raw = xt_value(ex, ey) - xt_s

        rows.append({
            "match_id": ev.get("match_id"),
            "minute": minute,
            "second": second,
            "team": team,
            "player": player,
            "type": t,
            "start_x": sx, "start_y": sy,
            "end_x": ex, "end_y": ey,
            "kept_possession": int(kept),
            "xt_start": xt_s,
            "xt_end_raw": xt_value(ex, ey),
            "xt_delta_raw": dxt_raw,
            "xt_delta_kept": dxt_kept,
            "xt_created": max(dxt_kept, 0.0),
        })
    return pd.DataFrame(rows)

## Step 4 – Turn raw events into ML rows

We convert StatsBomb events into clean tables we can train on:

- **Passes**: features (distance, angle, pressure, start/end zones, etc.) -> label = completed or not
- **Shots**: features (location, angle, pressure, etc.) -> label = goal or not  
  (we use StatsBomb xG when it exists, otherwise our shot model acts as an xG proxy)

Keeping it lightweight is intentional so it trains fast in Colab.


In [None]:
def safe_name(d, *keys):
    cur = d
    for k in keys:
        if cur is None:
            return None
        cur = cur.get(k)
    if isinstance(cur, dict):
        return cur.get("name")
    return cur

def is_completed_pass(ev):
    # In StatsBomb JSON, incomplete passes usually have pass.outcome present
    return ev.get("pass", {}).get("outcome") is None

def pass_rows_from_events(events, match_id):
    rows = []
    for ev in events:
        if ev.get("type", {}).get("name") != "Pass":
            continue
        loc = ev.get("location")
        end = ev.get("pass", {}).get("end_location")
        if not loc or not end:
            continue

        sx, sy = loc[0], loc[1]
        ex, ey = end[0], end[1]
        length = ev.get("pass", {}).get("length")
        angle = ev.get("pass", {}).get("angle")

        rows.append({
            "match_id": match_id,
            "team": safe_name(ev, "team"),
            "player": safe_name(ev, "player"),
            "minute": ev.get("minute", 0),
            "second": ev.get("second", 0),
            "start_x": sx, "start_y": sy,
            "end_x": ex, "end_y": ey,
            "length": float(length) if length is not None else math.hypot(ex-sx, ey-sy),
            "angle": float(angle) if angle is not None else math.atan2(ey-sy, ex-sx),
            "under_pressure": int(bool(ev.get("under_pressure", False))),
            "height": safe_name(ev.get("pass", {}), "height"),
            "body_part": safe_name(ev.get("pass", {}), "body_part"),
            "pass_type": safe_name(ev.get("pass", {}), "type"),
            "play_pattern": safe_name(ev, "play_pattern"),
            "cross": int(bool(ev.get("pass", {}).get("cross", False))),
            "switch": int(bool(ev.get("pass", {}).get("switch", False))),
            "through_ball": int(bool(ev.get("pass", {}).get("through_ball", False))),
            "cut_back": int(bool(ev.get("pass", {}).get("cut_back", False))),
            "xt_start": xt_value(sx, sy),
            "xt_end": xt_value(ex, ey),
            "xt_delta_raw": xt_value(ex, ey) - xt_value(sx, sy),
            "y_complete": int(is_completed_pass(ev)),
        })
    return rows

def shot_rows_from_events(events, match_id):
    rows = []
    for ev in events:
        if ev.get("type", {}).get("name") != "Shot":
            continue
        loc = ev.get("location")
        if not loc:
            continue
        x, y = loc[0], loc[1]
        dx = 120.0 - float(x)
        dy = float(y) - 40.0
        dist = math.hypot(dx, dy)
        angle = math.atan2(abs(dy), max(dx, 1e-6))

        outcome = safe_name(ev.get("shot", {}), "outcome")
        y_goal = int(outcome == "Goal")

        sb_xg = ev.get("shot", {}).get("statsbomb_xg")

        rows.append({
            "match_id": match_id,
            "team": safe_name(ev, "team"),
            "player": safe_name(ev, "player"),
            "minute": ev.get("minute", 0),
            "second": ev.get("second", 0),
            "x": float(x),
            "y": float(y),
            "dist": float(dist),
            "angle": float(angle),
            "under_pressure": int(bool(ev.get("under_pressure", False))),
            "body_part": safe_name(ev.get("shot", {}), "body_part"),
            "shot_type": safe_name(ev.get("shot", {}), "type"),
            "play_pattern": safe_name(ev, "play_pattern"),
            "sb_xg": float(sb_xg) if sb_xg is not None else np.nan,
            "y_goal": y_goal
        })
    return rows

## Step 5 – Build the training datasets

We build three datasets:
- `pass_df` (one row per pass)
- `shots_df` (one row per shot)
- `win_df` (a few time slices per match for win probability training)

You should see row counts and a quick preview table after running this section.


In [None]:
# Load events and build datasets
pass_rows = []
shot_rows = []
xt_by_match = {}    # cache xT actions per match
goals_by_match = {} # cache goal events per match
shots_by_match = {} # cache shots per match

def team_names_from_match_meta(match_meta_rows, match_id):
    m = next((r for r in match_meta_rows if r["match_id"] == match_id), None)
    if not m:
        return None, None
    return m["home_team"]["home_team_name"], m["away_team"]["away_team_name"]

def final_scores_from_match_meta(match_meta_rows, match_id):
    m = next((r for r in match_meta_rows if r["match_id"] == match_id), None)
    if not m:
        return None
    return m.get("home_score"), m.get("away_score")

for mid in tqdm(sample_ids, desc="Loading matches"):
    events = load_events(mid)
    # attach match_id for convenience
    for ev in events:
        ev["match_id"] = mid

    pass_rows.extend(pass_rows_from_events(events, mid))
    shot_rows.extend(shot_rows_from_events(events, mid))

    xt_actions = compute_xt_actions(events)
    xt_by_match[mid] = xt_actions

    # Goal events
    goals = []
    for ev in events:
        if ev.get("type", {}).get("name") == "Shot":
            if safe_name(ev.get("shot", {}), "outcome") == "Goal":
                goals.append({
                    "minute": ev.get("minute", 0),
                    "second": ev.get("second", 0),
                    "team": safe_name(ev, "team"),
                })
    goals_by_match[mid] = pd.DataFrame(goals)

    # Shots table for xG rollups
    shots = pd.DataFrame(shot_rows_from_events(events, mid))
    shots_by_match[mid] = shots

passes_df = pd.DataFrame(pass_rows)
shots_df = pd.DataFrame(shot_rows)

print("Passes:", len(passes_df), "Shots:", len(shots_df))
passes_df.head()

## Step 6 – Train pass completion model

Goal: predict `P(pass completes)` from event features. We print AUC + a quick calibration metric.


In [None]:
# Features
PASS_NUM = ["start_x","start_y","end_x","end_y","length","angle","under_pressure","cross","switch","through_ball","cut_back","xt_start","xt_end","xt_delta_raw"]
PASS_CAT = ["height","body_part","pass_type","play_pattern"]
target = "y_complete"

X = passes_df[PASS_NUM + PASS_CAT].copy()
y = passes_df[target].astype(int).values
groups = passes_df["match_id"].values

# Split by match_id to avoid leakage
gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
train_idx, test_idx = next(gss.split(X, y, groups=groups))

pre = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), PASS_NUM),
        ("cat", OneHotEncoder(handle_unknown="ignore"), PASS_CAT),
    ]
)

pass_model = Pipeline(steps=[
    ("pre", pre),
    ("clf", LogisticRegression(max_iter=200, n_jobs=None))
])

pass_model.fit(X.iloc[train_idx], y[train_idx])
p_test = pass_model.predict_proba(X.iloc[test_idx])[:, 1]
auc = roc_auc_score(y[test_idx], p_test)
brier = brier_score_loss(y[test_idx], p_test)

print("Pass model AUC:", round(auc, 4), "| Brier:", round(brier, 4))

## Step 7 – Train shot goal model (xG proxy)

Goal: predict `P(shot becomes a goal)` when StatsBomb xG is missing. This is mainly for the live demo pipeline.


In [None]:
SHOT_NUM = ["x","y","dist","angle","under_pressure"]
SHOT_CAT = ["body_part","shot_type","play_pattern"]
target = "y_goal"

X = shots_df[SHOT_NUM + SHOT_CAT].copy()
y = shots_df[target].astype(int).values
groups = shots_df["match_id"].values

gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
train_idx, test_idx = next(gss.split(X, y, groups=groups))

pre = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), SHOT_NUM),
        ("cat", OneHotEncoder(handle_unknown="ignore"), SHOT_CAT),
    ]
)

shot_model = Pipeline(steps=[
    ("pre", pre),
    ("clf", LogisticRegression(max_iter=400))
])

shot_model.fit(X.iloc[train_idx], y[train_idx])
p_test = shot_model.predict_proba(X.iloc[test_idx])[:, 1]
auc = roc_auc_score(y[test_idx], p_test)
brier = brier_score_loss(y[test_idx], p_test)

print("Shot model AUC:", round(auc, 4), "| Brier:", round(brier, 4))

# Use StatsBomb xG if present, otherwise use p_goal

## Step 8 – Train win probability model

We treat win probability as a supervised learning problem:

Given a compact match state:
- time remaining
- score difference (home – away)
- xG difference (home – away)
- xT difference (home – away)

we learn `P(home win | state)` from historical matches.

Label: 1 if home team wins, 0 otherwise (draws included as 0).


In [None]:
# Build time-slice dataset per match (for win probability training)
# Idea: at a few minutes in the match, build a compact "state" and label whether the home team eventually wins.

SLICES = [0, 15, 30, 45, 60, 75, 85]

win_rows = []
n_draw_matches = 0

for mid in sample_ids:
    home, away = team_names_from_match_meta(match_meta, mid)
    fs = final_scores_from_match_meta(match_meta, mid)
    if not fs or home is None:
        continue

    home_final, away_final = fs
    if home_final == away_final:
        n_draw_matches += 1

    # Home win vs not home win (draws + losses are 0)
    y_home_win = int(home_final > away_final)

    xt = xt_by_match[mid]
    shots = shots_by_match[mid]
    goals = goals_by_match[mid]

    for m in SLICES:
        # score up to minute m (inclusive)
        hg = int(((goals["team"] == home) & (goals["minute"] <= m)).sum()) if not goals.empty else 0
        ag = int(((goals["team"] == away) & (goals["minute"] <= m)).sum()) if not goals.empty else 0
        score_diff = hg - ag

        # xG diff up to minute m
        if len(shots) > 0:
            # If sb_xg missing, estimate using shot_model
            if shots["sb_xg"].notna().any():
                shots["_xg"] = shots["sb_xg"].fillna(0.0)
            else:
                Xs = shots[SHOT_NUM + SHOT_CAT].copy()
                shots["_xg"] = shot_model.predict_proba(Xs)[:, 1]
            hxg = float(shots.loc[(shots["team"] == home) & (shots["minute"] <= m), "_xg"].sum())
            axg = float(shots.loc[(shots["team"] == away) & (shots["minute"] <= m), "_xg"].sum())
        else:
            hxg = axg = 0.0
        xg_diff = hxg - axg

        # xT created diff up to minute m (possession-aware + positive-only)
        if len(xt) > 0:
            hxt = float(xt.loc[(xt["team"] == home) & (xt["minute"] <= m), "xt_created"].sum())
            axt = float(xt.loc[(xt["team"] == away) & (xt["minute"] <= m), "xt_created"].sum())
        else:
            hxt = axt = 0.0
        xt_diff = hxt - axt

        time_remaining = max(0, 90 - m)

        win_rows.append({
            "match_id": mid,
            "minute": m,
            "time_remaining": time_remaining,
            "score_diff_home": score_diff,
            "xg_diff_home": xg_diff,
            "xt_diff_home": xt_diff,
            "y_home_win": y_home_win
        })

win_df = pd.DataFrame(win_rows)
print("Win slices:", len(win_df), "| draw matches in sample:", n_draw_matches)
win_df.head()


In [None]:
WIN_NUM = ["time_remaining", "score_diff_home", "xg_diff_home", "xt_diff_home"]
target = "y_home_win"

X = win_df[WIN_NUM].copy()
y = win_df[target].astype(int).values
groups = win_df["match_id"].values

gss = GroupShuffleSplit(n_splits=1, test_size=0.25, random_state=RANDOM_SEED)
train_idx, test_idx = next(gss.split(X, y, groups=groups))

win_model = Pipeline(steps=[
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=400))
])

win_model.fit(X.iloc[train_idx], y[train_idx])
p_test = win_model.predict_proba(X.iloc[test_idx])[:, 1]
auc = roc_auc_score(y[test_idx], p_test)
brier = brier_score_loss(y[test_idx], p_test)
print("Win model AUC:", round(auc, 4), "| Brier:", round(brier, 4))

# Quick sanity checks: probability should increase with score_diff_home
tmp = pd.DataFrame({
    "time_remaining":[30,30,30],
    "score_diff_home":[-1,0,1],
    "xg_diff_home":[0,0,0],
    "xt_diff_home":[0,0,0],
})
print("Sanity win probs (down,draw,up):", win_model.predict_proba(tmp)[:,1].round(3).tolist())

## Step 9 – Export demo artefacts for one match

This section outputs CSVs that are directly useful for the frontend and for debugging:

- `player_xt_summary.csv`
- `top_xt_actions.csv`
- `winprob_timeline.csv`


In [None]:
ART = Path("/content/artifacts")
ART.mkdir(exist_ok=True)

MODELS = Path("/content/models")
MODELS.mkdir(exist_ok=True)

joblib.dump(pass_model, MODELS / "pass_model.joblib")
joblib.dump(shot_model, MODELS / "shot_model.joblib")
joblib.dump(win_model, MODELS / "win_model.joblib")
print("Saved models to:", MODELS)

demo_match_id = DEMO_MATCH_ID if DEMO_MATCH_ID is not None else sample_ids[0]
print("Demo match:", demo_match_id)

home, away = team_names_from_match_meta(match_meta, demo_match_id)

xt = xt_by_match[demo_match_id].copy()
shots = shots_by_match[demo_match_id].copy()
goals = goals_by_match[demo_match_id].copy()

# Player xT summary
player_xt = (xt.groupby(["team","player"], dropna=False)["xt_created"]
               .sum()
               .reset_index()
               .rename(columns={"xt_created":"xt_pos"})
               .sort_values("xt_pos", ascending=False))
player_xt.to_csv(ART / "player_xt_summary.csv", index=False)

# Top xT actions
top_actions = (xt[(xt["kept_possession"]==1) & (xt["xt_delta_kept"]>0)]
               .sort_values("xt_delta_kept", ascending=False)
               .head(50))
top_actions.to_csv(ART / "top_xt_actions.csv", index=False)

# Win probability timeline by minute (home perspective)
rows = []
for m in range(0, 91):
    # score
    hg = int(((goals["team"] == home) & (goals["minute"] <= m)).sum()) if not goals.empty else 0
    ag = int(((goals["team"] == away) & (goals["minute"] <= m)).sum()) if not goals.empty else 0
    score_diff = hg - ag

    # xG
    if len(shots) > 0:
        if shots["sb_xg"].notna().any():
            shots["_xg"] = shots["sb_xg"].fillna(0.0)
        else:
            Xs = shots[SHOT_NUM + SHOT_CAT].copy()
            shots["_xg"] = shot_model.predict_proba(Xs)[:, 1]
        hxg = float(shots.loc[(shots["team"] == home) & (shots["minute"] <= m), "_xg"].sum())
        axg = float(shots.loc[(shots["team"] == away) & (shots["minute"] <= m), "_xg"].sum())
    else:
        hxg = axg = 0.0
    xg_diff = hxg - axg

    # xT
    hxt = float(xt.loc[(xt["team"] == home) & (xt["minute"] <= m), "xt_created"].sum()) if len(xt) else 0.0
    axt = float(xt.loc[(xt["team"] == away) & (xt["minute"] <= m), "xt_created"].sum()) if len(xt) else 0.0
    xt_diff = hxt - axt

    time_remaining = max(0, 90 - m)

    feat = pd.DataFrame([{
        "time_remaining": time_remaining,
        "score_diff_home": score_diff,
        "xg_diff_home": xg_diff,
        "xt_diff_home": xt_diff,
    }])
    wp = float(win_model.predict_proba(feat)[0, 1])

    rows.append({
        "minute": m,
        "home_team": home,
        "away_team": away,
        "home_goals": hg,
        "away_goals": ag,
        "xg_diff_home": xg_diff,
        "xt_diff_home": xt_diff,
        "home_win_prob": wp,
    })

timeline = pd.DataFrame(rows)
timeline.to_csv(ART / "winprob_timeline.csv", index=False)

print("Wrote artefacts to:", ART)
player_xt.head(10)

## Step 10 – Zip everything for download

At the end you can download `coachos_artifacts.zip` from the Colab file browser.


In [None]:
# Zip for convenient download in Colab
!zip -qr coachos_artifacts.zip /content/artifacts /content/models
print("Created coachos_artifacts.zip")

## Plugging this into our hackathon app

- Backend loads `models/*.joblib` once on startup.
- At runtime we compute the same feature columns and call `predict_proba(...)`.
- The exported CSVs are handy for quick UI demos and sanity checks while we build Coach mode.
