In [1]:

# import_data.ipynb  ▸  Cell 1
# -------------------------------------------------------------
# Downloads:
#   • data/nbaallelo.csv               (FiveThirtyEight)
#   • data/wyattowalsh-basketball/*    (Kaggle, zipped → unzipped)
# -------------------------------------------------------------
import os
import sys
import subprocess
from shutil import which
from pathlib import Path

# ------------------------------------------------------------------ #
# 1. Make ./data directory
# ------------------------------------------------------------------ #
DATA_DIR = Path("data")
DATA_DIR.mkdir(exist_ok=True)
print(f"✓ Directory ready → {DATA_DIR.resolve()}")

# ------------------------------------------------------------------ #
# 2. Download FiveThirtyEight Elo CSV
# ------------------------------------------------------------------ #
import requests

elo_url  = "https://raw.githubusercontent.com/fivethirtyeight/data/master/nba-elo/nbaallelo.csv"
elo_path = DATA_DIR / "nbaallelo.csv"

if not elo_path.exists():
    print("• Downloading FiveThirtyEight Elo data …")
    resp = requests.get(elo_url, timeout=30)
    resp.raise_for_status()
    elo_path.write_bytes(resp.content)
    print(f"✓ Saved → {elo_path}")
else:
    print(f"• Elo file already present → {elo_path}")

# ------------------------------------------------------------------ #
# 3. Download Kaggle dataset using the CLI script
# ------------------------------------------------------------------ #
dataset_id = "wyattowalsh/basketball"
print(f"• Downloading Kaggle dataset '{dataset_id}' …")

kaggle_exe = which("kaggle")  # finds kaggle(.exe) on PATH

if kaggle_exe is None:
    print("\n🚨 Kaggle CLI not found on PATH.\n"
          "    • Activate the Conda env built from environment.yml, OR\n"
          "    • Install kaggle in the current environment:\n"
          "        pip install kaggle==1.7.4.5\n")
else:
    cmd = [
        kaggle_exe, "datasets", "download",
        dataset_id, "-p", str(DATA_DIR), "--unzip", "--force"
    ]
    try:
        subprocess.run(cmd, check=True)
        print(f"✓ Kaggle dataset extracted → {DATA_DIR}")
    except subprocess.CalledProcessError as e:
        print("\n⚠️  Kaggle CLI failed.")
        print("    • Ensure ~/.kaggle/kaggle.json exists and is valid")
        print("    • Accept the dataset’s rules on kaggle.com")
        print(f"    Kaggle error: {e}")


✓ Directory ready → C:\Users\kwame\Downloads\Erdos\data-science-summer-2025-project\data
• Elo file already present → data\nbaallelo.csv
• Downloading Kaggle dataset 'wyattowalsh/basketball' …
✓ Kaggle dataset extracted → data
