In [1]:
import pandas as pd
from pathlib import Path
import io, requests

In [2]:
# ---------------------------------------------------------------------
# 1.  Load Kaggle game metadata (date, season, id)
# ---------------------------------------------------------------------
CSV_DIR   = Path("data/csv")
DATA_DIR  = Path("data")

game_df = pd.read_csv(CSV_DIR / "game.csv",
                      usecols=["game_id", "game_date", "season_id"])
game_df["date_game"] = pd.to_datetime(game_df["game_date"]).dt.date

In [3]:
# ---------------------------------------------------------------------
# 2.  Load Kaggle line scores (already 1 row per TEAM per GAME)
# ---------------------------------------------------------------------
line_df = pd.read_csv(CSV_DIR / "line_score.csv",
                      usecols=["game_id", "team_abbreviation_home", "pts_home"])

# bring in the date + season
kaggle_long = line_df.merge(game_df[["game_id", "date_game", "season_id"]],
                            on="game_id", how="left")

In [4]:
# ---------------------------------------------------------------------
# 3.  Load & reshape FiveThirtyEight Elo → long format
# ---------------------------------------------------------------------
elo_raw = pd.read_csv(DATA_DIR / "nbaallelo.csv")
elo_raw["date_game"] = pd.to_datetime(elo_raw["date_game"]).dt.date

elo_team1 = (
    elo_raw[["date_game", "team_id", "elo_i", "elo_n"]]
      .rename(columns={"team_id": "TEAM_ABBREVIATION",
                       "elo_i": "elo_pre_538",
                       "elo_n": "elo_post_538"})
)
elo_team2 = (
    elo_raw[["date_game", "opp_id", "opp_elo_i", "opp_elo_n"]]
      .rename(columns={"opp_id": "TEAM_ABBREVIATION",
                       "opp_elo_i": "elo_pre_538",
                       "opp_elo_n": "elo_post_538"})
)

elo_long = pd.concat([elo_team1, elo_team2], ignore_index=True)

In [5]:
# ---------------------------------------------------------------------
# 4.  Reconcile historical team codes (minimal starter dict)
# ---------------------------------------------------------------------
alias = {
    "NJN": "BKN", "BRK": "BKN",   # Nets
    "NOH": "NOP", "NOK": "NOP",   # Pelicans
    "CHH": "CHA",                 # Old Hornets
    "SEA": "OKC",                 # Sonics → Thunder
}
kaggle_long["team_abbreviation_home"] = kaggle_long["team_abbreviation_home"].replace(alias)
kaggle_long.rename(columns={"team_abbreviation_home": "TEAM_ABBREVIATION"}, inplace=True)
elo_long["TEAM_ABBREVIATION"] = elo_long["TEAM_ABBREVIATION"].replace(alias)

In [6]:
# ---------------------------------------------------------------------
# 5.  Merge on date + franchise code
# ---------------------------------------------------------------------
merged = kaggle_long.merge(
    elo_long,
    on=["date_game", "TEAM_ABBREVIATION"],
    how="inner"
)

In [7]:
merged.head()

Unnamed: 0,game_id,TEAM_ABBREVIATION,pts_home,date_game,season_id,elo_pre_538,elo_post_538
0,24600002,BOS,53.0,1946-11-02,21946,1300.0,1294.8458
1,24600002,BOS,53.0,1946-11-02,21946,1300.0,1294.8458
2,24600004,CHS,63.0,1946-11-02,21946,1300.0,1309.6521
3,24600004,CHS,63.0,1946-11-02,21946,1300.0,1309.6521
4,24600008,BOS,55.0,1946-11-05,21946,1294.8458,1288.4139
