In [1]:

# import_data.ipynb  ‚ñ∏  Cell 1
# -------------------------------------------------------------
# Downloads:
#   ‚Ä¢ data/nbaallelo.csv               (FiveThirtyEight)
#   ‚Ä¢ data/wyattowalsh-basketball/*    (Kaggle, zipped ‚Üí unzipped)
# -------------------------------------------------------------
import os
import sys
import subprocess
from shutil import which
from pathlib import Path

# ------------------------------------------------------------------ #
# 1. Make ./data directory
# ------------------------------------------------------------------ #
DATA_DIR = Path("data")
DATA_DIR.mkdir(exist_ok=True)
print(f"‚úì Directory ready ‚Üí {DATA_DIR.resolve()}")

# ------------------------------------------------------------------ #
# 2. Download FiveThirtyEight Elo CSV
# ------------------------------------------------------------------ #
import requests

elo_url  = "https://raw.githubusercontent.com/fivethirtyeight/data/master/nba-elo/nbaallelo.csv"
elo_path = DATA_DIR / "nbaallelo.csv"

if not elo_path.exists():
    print("‚Ä¢ Downloading FiveThirtyEight Elo data ‚Ä¶")
    resp = requests.get(elo_url, timeout=30)
    resp.raise_for_status()
    elo_path.write_bytes(resp.content)
    print(f"‚úì Saved ‚Üí {elo_path}")
else:
    print(f"‚Ä¢ Elo file already present ‚Üí {elo_path}")

# ------------------------------------------------------------------ #
# 3. Download Kaggle dataset using the CLI script
# ------------------------------------------------------------------ #
dataset_id = "wyattowalsh/basketball"
print(f"‚Ä¢ Downloading Kaggle dataset '{dataset_id}' ‚Ä¶")

kaggle_exe = which("kaggle")  # finds kaggle(.exe) on PATH

if kaggle_exe is None:
    print("\nüö® Kaggle CLI not found on PATH.\n"
          "    ‚Ä¢ Activate the Conda env built from environment.yml, OR\n"
          "    ‚Ä¢ Install kaggle in the current environment:\n"
          "        pip install kaggle==1.7.4.5\n")
else:
    cmd = [
        kaggle_exe, "datasets", "download",
        dataset_id, "-p", str(DATA_DIR), "--unzip", "--force"
    ]
    try:
        subprocess.run(cmd, check=True)
        print(f"‚úì Kaggle dataset extracted ‚Üí {DATA_DIR}")
    except subprocess.CalledProcessError as e:
        print("\n‚ö†Ô∏è  Kaggle CLI failed.")
        print("    ‚Ä¢ Ensure ~/.kaggle/kaggle.json exists and is valid")
        print("    ‚Ä¢ Accept the dataset‚Äôs rules on kaggle.com")
        print(f"    Kaggle error: {e}")


‚úì Directory ready ‚Üí C:\Users\kwame\Downloads\Erdos\data-science-summer-2025-project\data
‚Ä¢ Elo file already present ‚Üí data\nbaallelo.csv
‚Ä¢ Downloading Kaggle dataset 'wyattowalsh/basketball' ‚Ä¶
‚úì Kaggle dataset extracted ‚Üí data
