# NBA MVP Export + Cleaning Pipeline\n\nThis notebook exports full-game (period=0) stats from Django ORM, then cleans and saves the MVP training CSV.

In [None]:
from pathlib import Path\nimport os\nimport sys\n\nimport numpy as np\nimport pandas as pd\n\nROOT = Path.cwd()\nsys.path.append(str(ROOT / "backend"))\nos.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings")\n\nimport django\ndjango.setup()\n\nfrom django.db.models import Case, ExpressionWrapper, F, FloatField, Value, When\nfrom nba_betting.models import PlayerStats\n\nEXPORTS_DIR = ROOT / "exports"
EXPORTS_DIR.mkdir(exist_ok=True)
MVP_PATH = EXPORTS_DIR / "nba_mvp_data.csv"
OUT_MVP_PATH = EXPORTS_DIR / "nba_training_mvp_v1.csv"

In [None]:
queryset = (\n    PlayerStats.objects.filter(period=0)\n    .annotate(\n        fg_pct=Case(\n            When(\n                fga__gt=0,\n                then=ExpressionWrapper(\n                    F("fgm") * 1.0 / F("fga"), output_field=FloatField()\n                ),\n            ),\n            default=Value(0.0),\n            output_field=FloatField(),\n        ),\n        fg3_pct=Value(0.0, output_field=FloatField()),\n    )\n    .values(\n        "game__date",\n        "game__game_id",\n        "player__first_name",\n        "player__last_name",\n        "team__abbreviation",\n        "game__home_team__abbreviation",\n        "game__away_team__abbreviation",\n        "pts",\n        "reb",\n        "ast",\n        "min",\n        "fg_pct",\n        "fg3_pct",\n    )\n)\n\ndf = pd.DataFrame.from_records(queryset)\ndf = df.rename(\n    columns={\n        "game__date": "date",\n        "game__game_id": "game_id",\n        "team__abbreviation": "player_team",\n        "game__home_team__abbreviation": "home_team",\n        "game__away_team__abbreviation": "away_team",\n    }\n)\ndf["player_name"] = (\n    df["player__first_name"].fillna("").str.strip()\n    + " "\n    + df["player__last_name"].fillna("").str.strip()\n).str.strip()\ndf = df.drop(columns=["player__first_name", "player__last_name"])\ndf = df.drop_duplicates(subset=["game_id", "player_name"])\ndf.to_csv(MVP_PATH, index=False)\n\nprint(f"Exported {len(df)} unique player-game rows to {MVP_PATH}")

In [None]:
df = pd.read_csv(MVP_PATH)\n\n# Purge test rows\ndf = df[~df["game_id"].astype(str).str.startswith(("TEST", "999"))].copy()\n\n# Quality checks\nnumeric_cols = df.select_dtypes(include=[np.number]).columns\ndf[numeric_cols] = df[numeric_cols].fillna(0)\nfor col in numeric_cols:\n    if col.endswith("min") or col.endswith("pct"):\n        df[col] = df[col].astype(float)\n    else:\n        df[col] = df[col].astype(int)\n\ndf.to_csv(OUT_MVP_PATH, index=False)\nprint(f"Saved: {OUT_MVP_PATH} (rows={len(df)})")