### DAILY GAME IDS

- grabs games based on date
- date, time, home, away, neutral, conference

In [2]:
import glob
import pandas as pd
import numpy as np
import re

DATE = "20251104"
csv_files = glob.glob("data/boxscores/game-info-2026/*.csv")
combined_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

team_map = pd.read_csv("data/teams/map.csv")[["team", "espn"]]

combined_df = combined_df.merge(team_map, left_on="home_team", right_on="espn", how="left").merge(team_map, left_on="away_team", right_on="espn", how="left")

combined_df['home'] = combined_df['team_x']
combined_df['away'] = combined_df['team_y']

combined_df['date'] = pd.to_datetime(combined_df['date_utc']).dt.strftime('%Y%m%d')
combined_df['date_key'] = pd.to_numeric(combined_df['date'], errors='coerce').astype('Int64')

combined_df['date'] = pd.to_datetime(combined_df['date_utc']).dt.strftime('%Y%m%d')
combined_df['date_key'] = pd.to_numeric(combined_df['date'], errors='coerce').astype('Int64')
conferences = pd.concat([pd.read_csv("barttorvik_2024_all.csv")[["Team", "Conf"]], pd.read_csv("barttorvik_2025_all.csv")[["Team", "Conf"]], pd.read_csv("barttorvik_2026_all.csv")[["Team", "Conf"]]], axis=0)
conferences = conferences[conferences['Team'] != "Team"]
conferences['Team'] = conferences['Team'].str.extract(r'^([A-Za-z\s.&]+)')[0].str.strip()
conferences = conferences.drop_duplicates(subset="Team")

# --- normalize team names (strip seeds/suffixes) ---
name_pat = r'^([A-Za-z\s.&\'-]+)'
def clean_team(s):
    if pd.isna(s): return s
    m = re.match(name_pat, str(s))
    base = m.group(1) if m else str(s)
    return re.sub(r'\s+', ' ', base).strip()

combined_df['home_key'] = combined_df['home'].map(clean_team)
combined_df['away_key'] = combined_df['away'].map(clean_team)
conferences['team_key'] = conferences['Team'].map(clean_team)

right = conferences.drop_duplicates(['team_key']).copy()

# --- Build HOME version of the right table ---
home_cols = [c for c in right.columns if c not in ['date_key', 'team_key']]
torvik_home = right.rename(columns={'team_key': 'home_key', **{c: f'{c}_home' for c in home_cols}})

# --- Merge HOME ---
combined_df = combined_df.merge(
    torvik_home,
    on='home_key',
    how='left',
    validate='many_to_one'
)

# --- Build AWAY version of the right table ---
away_cols = [c for c in right.columns if c not in ['date_key', 'team_key']]
torvik_away = right.rename(columns={'team_key': 'away_key', **{c: f'{c}_away' for c in away_cols}})

# --- Merge AWAY ---
combined_df = combined_df.merge(
    torvik_away,
    on='away_key',
    how='left',
    validate='many_to_one'
)

combined_df['season'] = 2026
combined_df['neutral_site'] = np.where(combined_df['neutral_site'] == True, 1, 0)

combined_df = combined_df[["game_id", "date", "date_key", "date_utc", "time_utc", "neutral_site", "home", "away", "Conf_home", "Conf_away"]]
combined_df.columns = ["game_id", "date", "date_key", "date_utc", "time_utc", "neutral_site", "home", "away", "conf_home", "conf_away"]

csv_files = glob.glob(f"daily-box-score-ids/{DATE}/*.csv")
game_id_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

game_ids = list(game_id_df['game_id'])
combined_df[combined_df['game_id'].isin(game_ids)].to_csv("daily-games/daily.csv")

### SEASON GAME INFORMATION AND TEAM STATS

In [3]:
import glob
import pandas as pd

csv_files = glob.glob("data/boxscores/game-info-2026/*.csv")
combined_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

team_map = pd.read_csv("data/teams/map.csv")[["team", "espn"]]

combined_df = combined_df.merge(team_map, left_on="home_team", right_on="espn", how="left").merge(team_map, left_on="away_team", right_on="espn", how="left")

combined_df['home'] = combined_df['team_x']
combined_df['away'] = combined_df['team_y']

combined_df = combined_df.dropna(subset="home").dropna(subset="away").dropna(subset="home_1h")
combined_df = combined_df[['game_id', 'date_utc', 'time_utc', 'neutral_site', 'home',
       'away', 'home_1h', 'away_1h', 'home_2h', 'away_2h', 'home_score',
       'away_score']]

csv_files = glob.glob("data/boxscores/team-stats-2026/*.csv")
team_combined_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

combined_df.sort_values("date_utc")

df = team_combined_df.sort_values(['displayOrder'])

# create a helper column to pair home/away rows by game id
# (if you don't already have a game_id column)
# Split into home and away
home_df = df[df['homeAway'] == 'home'].copy()
away_df = df[df['homeAway'] == 'away'].copy()

# Columns we don't want duplicated (they’ll be renamed anyway)
cols_to_remove = ['homeAway', 'displayOrder', 'abbreviation', 'team_id']

# Rename columns to indicate home/away
home_df = home_df.drop(columns=cols_to_remove).add_suffix('_home')
away_df = away_df.drop(columns=cols_to_remove).add_suffix('_away')

# Merge back together on the shared game_id
# (keep original game_id)
final_df = pd.merge(
    home_df,
    away_df,
    left_on='game_id_home',
    right_on='game_id_away',
    suffixes=('', ''),
    how='inner'
)

# Keep just one copy of game_id
final_df['game_id'] = final_df['game_id_home']
final_df = final_df.drop(columns=['game_id_home', 'game_id_away'])

# Optional: reorder columns to have game_id first
cols = ['game_id'] + [c for c in final_df.columns if c != 'game_id']
final_df = final_df[cols]

final_df = final_df[['game_id', 'assists_home', 'defensiveRebounds_home', 'freeThrowPct_home',
       'threePointFieldGoalsMade-threePointFieldGoalsAttempted_home',
       'fouls_home', 'totalRebounds_home', 'threePointFieldGoalPct_home',
       'teamTurnovers_home', 'pointsInPaint_home', 'technicalFouls_home',
       'totalTechnicalFouls_home', 'largestLead_home',
       'offensiveRebounds_home', 'fieldGoalPct_home',
       'totalTurnovers_home', 'turnoverPoints_home', 'flagrantFouls_home',
       'freeThrowsMade-freeThrowsAttempted_home', 'steals_home',
       'fieldGoalsMade-fieldGoalsAttempted_home', 'blocks_home',
       'fastBreakPoints_home', 'turnovers_home',  'assists_away',
       'defensiveRebounds_away', 'freeThrowPct_away',
       'threePointFieldGoalsMade-threePointFieldGoalsAttempted_away',
       'fouls_away', 'totalRebounds_away', 'threePointFieldGoalPct_away',
       'teamTurnovers_away', 'pointsInPaint_away', 'technicalFouls_away',
       'totalTechnicalFouls_away', 'largestLead_away',
       'offensiveRebounds_away', 'fieldGoalPct_away',
       'totalTurnovers_away', 'turnoverPoints_away', 'flagrantFouls_away',
       'freeThrowsMade-freeThrowsAttempted_away', 'steals_away',
       'fieldGoalsMade-fieldGoalsAttempted_away', 'blocks_away',
       'fastBreakPoints_away', 'turnovers_away']]

combined_df = combined_df.merge(final_df, on="game_id")

csv_files = glob.glob("data/boxscores/officials-2026/*.csv")
officials_df = pd.concat((pd.read_csv(f) for f in csv_files), ignore_index=True)

# create a rank/order number per game_id
officials_df['official_number'] = officials_df.groupby('game_id').cumcount() + 1

# pivot to wide format
flat_officials = (
    officials_df.pivot(index='game_id', columns='official_number', values='official_name')
    .rename(columns=lambda x: f'official_{x}')
    .reset_index()
)

flat_officials = flat_officials[["game_id", "official_1", "official_2", "official_3"]]

combined_df = combined_df.merge(flat_officials, on="game_id")

  uniques = Index(uniques)


In [4]:
import pandas as pd
import numpy as np
import glob
import warnings

warnings.filterwarnings("ignore")

team_map = pd.read_csv("data/teams/map.csv")[["team", "espn"]]

game_df = combined_df
game_df['date'] = pd.to_datetime(game_df['date_utc']).dt.strftime('%Y%m%d')
game_df['home_margin'] = game_df['home_score'] - game_df['away_score']
game_df['away_margin'] = game_df['away_score'] - game_df['home_score']

csv_files_2026 = glob.glob("daily_csvs_2026/*.csv")
daily_torvik_2026_df = pd.concat((pd.read_csv(f) for f in csv_files_2026), ignore_index=True)
daily_torvik_2026_df = daily_torvik_2026_df[daily_torvik_2026_df['Team'] != "Team"]
daily_torvik_2026_df['Team'] = daily_torvik_2026_df['Team'].str.extract(r'^([A-Za-z\s.&]+)')[0].str.strip()
daily_torvik_2026_df['WAB'] = daily_torvik_2026_df['WAB'].str.replace("+","", regex=False).astype("float")
daily_torvik_2026_df['season'] = 2026
daily_torvik_2026_df = daily_torvik_2026_df[['season','Date', 'Team', 'Rk', 'Conf', 'G', 'AdjOE', 'AdjDE', 'Barthag',
       'EFG%', 'EFGD%', 'TOR', 'TORD', 'ORB', 'DRB', 'FTR', 'FTRD', '2P%',
       '2P%D', '3P%', '3P%D', '3PR', '3PRD', 'Adj T.', 'WAB']].sort_values(["Date","Team"], ascending=True)
daily_torvik_2026_df.columns = ['season', 'date', 'team', 'rank', 'conf', 'games', 'adj_off_eff', 'adj_def_eff', 'barthag',
       'efg_pct', 'efgd_pct', 'tor', 'tord', 'orb', 'drb', 'ftr', 'ftrd', 'two_pt_pct',
       'two_pt_def_pct', 'three_pt_pct', 'three_pt_def_pct', 'three_pt_rt', 'three_pt_def_rt', 'adj_tempo', 'wab']

# assert len(set(daily_torvik_2026_df['team']) - set(team_map['team'])) == 0

daily_torvik_df = daily_torvik_2026_df


import re
game_df['date_key'] = pd.to_numeric(game_df['date'], errors='coerce').astype('Int64')
daily_torvik_df['date_key'] = pd.to_numeric(daily_torvik_df['date'], errors='coerce').astype('Int64')

# --- normalize team names (strip seeds/suffixes) ---
name_pat = r'^([A-Za-z\s.&\'-]+)'
def clean_team(s):
    if pd.isna(s): return s
    m = re.match(name_pat, str(s))
    base = m.group(1) if m else str(s)
    return re.sub(r'\s+', ' ', base).strip()

game_df['home_key'] = game_df['home'].map(clean_team)
game_df['away_key'] = game_df['away'].map(clean_team)
daily_torvik_df['team_key'] = daily_torvik_df['team'].map(clean_team)

right = daily_torvik_df.drop_duplicates(['date_key', 'team_key']).copy()

# --- Build HOME version of the right table ---
home_cols = [c for c in right.columns if c not in ['date_key', 'team_key']]
torvik_home = right.rename(columns={'team_key': 'home_key', **{c: f'{c}_home' for c in home_cols}})

# --- Merge HOME ---
merged_df = game_df.merge(
    torvik_home,
    on=['date_key', 'home_key'],
    how='left',
    validate='many_to_one'
)

# --- Build AWAY version of the right table ---
away_cols = [c for c in right.columns if c not in ['date_key', 'team_key']]
torvik_away = right.rename(columns={'team_key': 'away_key', **{c: f'{c}_away' for c in away_cols}})

# --- Merge AWAY ---
merged_df = merged_df.merge(
    torvik_away,
    on=['date_key', 'away_key'],
    how='left',
    validate='many_to_one'
)

merged_df['season'] = 2026
merged_df['neutral_site'] = np.where(merged_df['neutral_site'] == True, 1, 0)

merged_df = merged_df[[
    'game_id',
    'season',
    'date',
    'date_utc',
    'time_utc',
    'neutral_site',
    'home',
    'away',
    'home_1h',
    'away_1h',
    'home_2h',
    'away_2h',
    'home_score',
    'away_score',
    'home_margin',
    'away_margin',
    'assists_home',
    'fouls_home',
    'technicalFouls_home',
    'flagrantFouls_home',
    'totalRebounds_home',
    'offensiveRebounds_home',
    'defensiveRebounds_home',
    'pointsInPaint_home',
    'turnovers_home',
    'turnoverPoints_home',
    'steals_home',
    'blocks_home',
    'fastBreakPoints_home',
    'assists_away',
    'fouls_away',
    'technicalFouls_away',
    'flagrantFouls_away',
    'totalRebounds_away',
    'offensiveRebounds_away',
    'defensiveRebounds_away',
    'pointsInPaint_away',
    'turnovers_away',
    'turnoverPoints_away',
    'steals_away',
    'blocks_away',
    'fastBreakPoints_away',
    'official_1',
    'official_2',
    'official_3',  
    'rank_home',
    'conf_home',
    'games_home',
    'adj_off_eff_home',
    'adj_def_eff_home',
    'barthag_home',
    'efg_pct_home',
    'efgd_pct_home',
    'tor_home',
    'tord_home',
    'orb_home',
    'drb_home',
    'ftr_home',
    'ftrd_home',
    'two_pt_pct_home',
    'two_pt_def_pct_home',
    'three_pt_pct_home',
    'three_pt_def_pct_home',
    'three_pt_rt_home',
    'three_pt_def_rt_home',
    'adj_tempo_home',
    'wab_home',
    'rank_away',
    'conf_away',
    'games_away',
    'adj_off_eff_away',
    'adj_def_eff_away',
    'barthag_away',
    'efg_pct_away',
    'efgd_pct_away',
    'tor_away',
    'tord_away',
    'orb_away',
    'drb_away',
    'ftr_away',
    'ftrd_away',
    'two_pt_pct_away',
    'two_pt_def_pct_away',
    'three_pt_pct_away',
    'three_pt_def_pct_away',
    'three_pt_rt_away',
    'three_pt_def_rt_away',
    'adj_tempo_away',
    'wab_away']]

### FEATURE SET 1 USING DAILY.CSV

In [7]:
# inference_time_officials_conference.py
from __future__ import annotations
import json
from typing import Dict, Optional, Iterable, Tuple
import numpy as np
import pandas as pd

# ---- Configuration ----
OFFICIAL_COLS = ['official_1', 'official_2', 'official_3']
LOCAL_TZ = 'America/New_York'  # Eastern time

# ---------- Helpers ----------
def _ensure_date_key_str(s: pd.Series) -> pd.Series:
    """Normalize YYYYMMDD to 8-char string from any input series."""
    return s.astype(str).str.extract(r'(\d{8})')[0]

def _build_tipoff_utc(df: pd.DataFrame) -> pd.Series:
    """
    Build timezone-aware UTC datetime from (date_key + time_utc like '21:00Z').
    Requires: 'date_key' (YYYYMMDD) and 'time_utc' ('HH:MMZ' or 'HH:MM').
    """
    if 'date_utc' not in df.columns:
        raise KeyError("Expected 'date_key' (YYYYMMDD).")
    if 'time_utc' not in df.columns:
        raise KeyError("Expected 'time_utc' like '21:00Z' or '21:00'.")

    # use date_key (not 'date'); coerce invalids to NaT
    date_key = _ensure_date_key_str(df['date'])
    t = df['time_utc'].astype(str).str.strip()
    t = np.where(t.str.endswith('Z'), t, t + 'Z')  # ensure trailing Z
    iso_date = pd.to_datetime(date_key, format='%Y%m%d', errors='coerce').dt.strftime('%Y-%m-%d')
    iso = iso_date + ' ' + t
    tipoff_utc = pd.to_datetime(iso, utc=True, errors='coerce', infer_datetime_format=True)
    return tipoff_utc

def _time_features_from_dt(dt: pd.Series, prefix: str) -> pd.DataFrame:
    """
    From a timezone-aware datetime series, produce:
      - {prefix}_hour, {prefix}_minute, {prefix}_second
      - {prefix}_seconds_since_midnight
      - {prefix}_hour_sin, {prefix}_hour_cos (cyclical)
    """
    out = pd.DataFrame(index=dt.index)
    out[f'{prefix}_hour'] = dt.dt.hour.fillna(0).astype('int16')
    out[f'{prefix}_minute'] = dt.dt.minute.fillna(0).astype('int16')
    out[f'{prefix}_second'] = dt.dt.second.fillna(0).astype('int16')
    out[f'{prefix}_seconds_since_midnight'] = (
        out[f'{prefix}_hour'] * 3600 + out[f'{prefix}_minute'] * 60 + out[f'{prefix}_second']
    ).astype('int32')

    two_pi = 2 * np.pi
    out[f'{prefix}_hour_sin'] = np.sin(two_pi * out[f'{prefix}_hour'] / 24.0)
    out[f'{prefix}_hour_cos'] = np.cos(two_pi * out[f'{prefix}_hour'] / 24.0)
    return out

def _add_day_flags(local_dt: pd.Series, base_df: pd.DataFrame, prefix: str) -> pd.DataFrame:
    """
    Day-of-week flags on LOCAL time:
      - {prefix}_is_weekend (Sat/Sun)
      - {prefix}_is_primetime (18:00–22:59)
      - {prefix}_daypart_* one-hots: morning(5–11), afternoon(12–16), evening(17–21), late(other)
    """
    out = base_df.copy()
    dow = local_dt.dt.dayofweek  # Mon=0..Sun=6
    out[f'{prefix}_is_weekend'] = dow.isin([5, 6]).fillna(False).astype('int8')

    hour = local_dt.dt.hour.fillna(0).astype(int)
    out[f'{prefix}_is_primetime'] = ((hour >= 18) & (hour <= 22)).astype('int8')

    def _daypart(h):
        if 5 <= h <= 11:  return 'morning'
        if 12 <= h <= 16: return 'afternoon'
        if 17 <= h <= 21: return 'evening'
        return 'late'

    dp = hour.map(_daypart).astype('category')
    dummies = pd.get_dummies(dp, prefix=f'{prefix}_daypart', dtype='int8')
    out = pd.concat([out, dummies], axis=1)
    return out

# ---------- Transform (apply saved maps) ----------
def transform_officials_with_map(df: pd.DataFrame, mapping: Dict[str, int],
                                 official_cols: Iterable[str] = OFFICIAL_COLS) -> pd.DataFrame:
    """Apply the shared mapping to each official* column, creating *_code columns."""
    out = df.copy()
    unk = mapping.get('UNK', 0)
    for c in official_cols:
        if c in out.columns:
            s = out[c].astype('string')
            out[f'{c}_code'] = s.map(mapping).fillna(unk).astype('int32')
        else:
            out[f'{c}_code'] = unk
    return out

def transform_with_map(series: pd.Series, mapping: Dict[str, int], fill_value: str = 'UNK') -> pd.Series:
    """Transform using a prefit mapping, unknowns go to code for fill_value (default 0)."""
    return series.astype('string').fillna(fill_value).map(mapping).fillna(mapping.get(fill_value, 0)).astype('int32')

# ---------- Public Inference Entry ----------
def load_enc_maps(path: str) -> Dict[str, Dict[str, int]]:
    with open(path, "r") as f:
        return json.load(f)

def build_time_officials_conference_features_inference(
    df: pd.DataFrame,
    enc_maps: Dict[str, Dict[str, int]],
    *,
    add_et_features: bool = True,
    make_conference_dummies: bool = False
) -> Tuple[pd.DataFrame, Dict[str, Dict[str, int]]]:
    """
    INFERENCE version:
      - Uses prefit maps in `enc_maps` to transform officials + conferences
      - Builds UTC/ET time features + flags
      - Does NOT refit any encoder
    Returns: (features_df, enc_maps) for convenience
    """
    out = df.copy()

    # Ensure date_key exists for time parsing
    if 'date_key' not in out.columns:
        if 'date' in out.columns:
            out['date_key'] = _ensure_date_key_str(out['date'])
        else:
            raise KeyError("Expected 'date' or 'date_key' in inference dataframe.")

    # Build UTC time + time features
    out['tipoff_utc'] = _build_tipoff_utc(out)
    utc_feats = _time_features_from_dt(out['tipoff_utc'], prefix='utc')
    out = pd.concat([out, utc_feats], axis=1)

    # Local (ET) features + day flags
    if add_et_features:
        tipoff_et = out['tipoff_utc'].dt.tz_convert(LOCAL_TZ)
        et_feats = _time_features_from_dt(tipoff_et, prefix='et')
        out = pd.concat([out, et_feats], axis=1)
        out = _add_day_flags(tipoff_et, out, prefix='et')

    # Officials (shared map)
    official_map = enc_maps.get('official_map', {'UNK': 0})
    out = transform_officials_with_map(out, official_map, OFFICIAL_COLS)

    # Conferences
    if 'conf_home' in out.columns:
        conf_home_map = enc_maps.get('conf_home_map', {'UNK': 0})
        out['conf_home_code'] = transform_with_map(out['conf_home'], conf_home_map)
        if make_conference_dummies:
            dummies = pd.get_dummies(out['conf_home'].astype('string').fillna('UNK'),
                                     prefix='conf_home', dtype='int8')
            out = pd.concat([out, dummies], axis=1)

    if 'conf_away' in out.columns:
        conf_away_map = enc_maps.get('conf_away_map', {'UNK': 0})
        out['conf_away_code'] = transform_with_map(out['conf_away'], conf_away_map)
        if make_conference_dummies:
            dummies = pd.get_dummies(out['conf_away'].astype('string').fillna('UNK'),
                                     prefix='conf_away', dtype='int8')
            out = pd.concat([out, dummies], axis=1)

    return out, enc_maps

# ---------- Optional: align to training feature set ----------
def align_to_training_features(df_features: pd.DataFrame, train_feature_cols: list) -> pd.DataFrame:
    """
    Reindex to the exact training feature set:
      - add any missing columns (filled with 0),
      - drop any extra columns,
      - keep the same ordering as training.
    Ensures numeric dtype for all features.
    """
    X = df_features.reindex(columns=train_feature_cols, fill_value=0)
    for c in X.columns:
        if not np.issubdtype(X[c].dtype, np.number):
            X[c] = pd.to_numeric(X[c], errors='coerce').fillna(0)
    return X

enc_maps = load_enc_maps("data/train/enc_maps.json")

# 2) Apply to new games dataframe (must have: 'date' or 'date_key', and 'time_utc')
features_1, _ = build_time_officials_conference_features_inference(
    pd.read_csv("daily-games/daily.csv"), enc_maps,
    add_et_features=True,
    make_conference_dummies=True
)
features_1[["et_daypart_afternoon",'et_daypart_morning']] = 0
features_1 = features_1[['game_id', 'utc_seconds_since_midnight', 'utc_hour_sin', 'utc_hour_cos',
       'et_hour', 'et_minute', 'et_second', 'et_seconds_since_midnight',
       'et_hour_sin', 'et_hour_cos', 'et_is_weekend', 'et_is_primetime',
       'et_daypart_afternoon', 'et_daypart_evening', 'et_daypart_late',
       'et_daypart_morning', 'official_1_code', 'official_2_code',
       'official_3_code', 'conf_home_code', 'conf_away_code']]



assert len(set(list(features_1.columns)) - set(['game_id', 'utc_seconds_since_midnight', 'utc_hour_sin', 'utc_hour_cos',
       'et_hour', 'et_minute', 'et_second', 'et_seconds_since_midnight',
       'et_hour_sin', 'et_hour_cos', 'et_is_weekend', 'et_is_primetime',
       'et_daypart_afternoon', 'et_daypart_evening', 'et_daypart_late',
       'et_daypart_morning', 'official_1_code', 'official_2_code',
       'official_3_code', 'conf_home_code', 'conf_away_code'])) == 0

### FEATURE SET 2/3 USING 2026 GAME INFORMATION AND TORVVIK RATINGS

In [8]:
import pandas as pd
import numpy as np

# --- base dataframe ---
df = merged_df[['game_id', 'season', 'date', 'date_utc', 'time_utc',
       'neutral_site', 'home', 'away', 'home_1h', 'away_1h', 'home_2h',
       'away_2h', 'home_score', 'away_score', 'home_margin',
       'away_margin','rank_home', 'games_home',
       'adj_off_eff_home', 'adj_def_eff_home', 'barthag_home',
       'efg_pct_home', 'efgd_pct_home', 'tor_home', 'tord_home',
       'orb_home', 'drb_home', 'ftr_home', 'ftrd_home', 'two_pt_pct_home',
       'two_pt_def_pct_home', 'three_pt_pct_home',
       'three_pt_def_pct_home', 'three_pt_rt_home',
       'three_pt_def_rt_home', 'adj_tempo_home', 'wab_home', 'rank_away',
       'games_away', 'adj_off_eff_away', 'adj_def_eff_away',
       'barthag_away', 'efg_pct_away', 'efgd_pct_away', 'tor_away',
       'tord_away', 'orb_away', 'drb_away', 'ftr_away', 'ftrd_away',
       'two_pt_pct_away', 'two_pt_def_pct_away', 'three_pt_pct_away',
       'three_pt_def_pct_away', 'three_pt_rt_away',
       'three_pt_def_rt_away', 'adj_tempo_away', 'wab_away']].copy()

# --- 1) datetime for chronological sort (UTC preferred if available) ---
if 'date_utc' in df.columns and df['date_utc'].notna().any():
    if 'time_utc' in df.columns:
        df['game_dt'] = pd.to_datetime(
            df['date_utc'].astype(str).str.strip() + ' ' +
            df['time_utc'].fillna('00:00:00').astype(str).str.strip(),
            errors='coerce', utc=True
        )
    else:
        df['game_dt'] = pd.to_datetime(df['date_utc'], errors='coerce', utc=True)
else:
    df['game_dt'] = pd.to_datetime(df['date'], errors='coerce')

# --- 2) bases from *_home / *_away ---
suffix_cols = [c for c in df.columns if c.endswith('_home') or c.endswith('_away')]
bases = sorted({c.rsplit('_', 1)[0] for c in suffix_cols})

# --- 3) map columns to team/opp perspective ---
home_to_team = {f'{b}_home': b for b in bases}
away_to_team = {f'{b}_away': b for b in bases}
home_to_opp  = {f'{b}_away': f'opp_{b}' for b in bases}
away_to_opp  = {f'{b}_home': f'opp_{b}' for b in bases}

id_cols = ['game_id','season','date','game_dt','neutral_site']
id_cols = [c for c in id_cols if c in df.columns]

home_view = df[id_cols + ['home','away'] + suffix_cols].copy()
home_view = home_view.rename(columns={'home':'team','away':'opponent'})
home_view = home_view.rename(columns={**home_to_team, **home_to_opp})
home_view['is_home'] = 1

away_view = df[id_cols + ['home','away'] + suffix_cols].copy()
away_view = away_view.rename(columns={'away':'team','home':'opponent'})
away_view = away_view.rename(columns={**away_to_team, **away_to_opp})
away_view['is_home'] = 0

team_games = pd.concat([home_view, away_view], ignore_index=True, sort=False)
team_games = team_games.sort_values(['team','season','game_dt','game_id'], ignore_index=True)

# --- 4) coerce numeric fields used below ---
def _to_num(s: pd.Series) -> pd.Series:
    if s.dtype == 'O':
        s = s.astype(str).str.strip().str.rstrip('%')
    return pd.to_numeric(s, errors='coerce')

numeric_needed = set()
for b in bases:
    if b in team_games.columns: numeric_needed.add(b)
    ob = f'opp_{b}'
    if ob in team_games.columns: numeric_needed.add(ob)
if 'opp_rank' in team_games.columns: numeric_needed.add('opp_rank')

if numeric_needed:
    team_games[list(numeric_needed)] = team_games[list(numeric_needed)].apply(_to_num)

# --- 5) cumulative opponent rank (inclusive + "pre" if you still use it) ---
if 'opp_rank' in team_games.columns:
    g = team_games.groupby(['team','season'], dropna=False)['opp_rank']
    team_games['opp_rank_cummean_incl'] = g.cumsum() / (g.cumcount() + 1)  # includes current
    cs = g.cumsum()
    cnt = g.cumcount()
    team_games['opp_rank_cummean_pre'] = cs.shift(1) / cnt.replace(0, np.nan)  # prior-only

# --- 6) take the latest played row per team-season ---
latest = (
    team_games
    .sort_values(['team','season','game_dt','game_id'])
    .groupby(['team','season'], as_index=False, sort=False)
    .tail(1)
    .reset_index(drop=True)
)

# --- 7) rename CURRENT values to match training names (lag1_*) ---
team_feature_cols = [c for c in bases if c in latest.columns]
opp_feature_cols  = [f'opp_{b}' for b in bases if f'opp_{b}' in latest.columns]

rename_map = {}
for c in team_feature_cols:
    rename_map[c] = f'lag1_{c}'
for c in opp_feature_cols:
    rename_map[c] = f'lag1_{c}'

latest_renamed = latest.rename(columns=rename_map)

# --- 8) build features_2 with the exact columns you listed ---
wanted_cols = ['game_id','team','opponent',
    'lag1_adj_def_eff','lag1_adj_off_eff','lag1_adj_tempo','lag1_barthag',
    'lag1_drb','lag1_efg_pct','lag1_efgd_pct','lag1_ftr','lag1_ftrd',
    'lag1_games','lag1_orb','lag1_rank','lag1_three_pt_def_pct',
    'lag1_three_pt_def_rt','lag1_three_pt_pct','lag1_three_pt_rt',
    'lag1_tor','lag1_tord','lag1_two_pt_def_pct','lag1_two_pt_pct','lag1_wab',
    'lag1_opp_adj_def_eff','lag1_opp_adj_off_eff','lag1_opp_adj_tempo',
    'lag1_opp_barthag','lag1_opp_drb','lag1_opp_efg_pct','lag1_opp_efgd_pct',
    'lag1_opp_ftr','lag1_opp_ftrd','lag1_opp_games','lag1_opp_orb',
    'lag1_opp_rank','lag1_opp_three_pt_def_pct','lag1_opp_three_pt_def_rt',
    'lag1_opp_three_pt_pct','lag1_opp_three_pt_rt','lag1_opp_tor',
    'lag1_opp_tord','lag1_opp_two_pt_def_pct','lag1_opp_two_pt_pct',
    'lag1_opp_wab',
    'opp_rank_cummean_incl','opp_rank_cummean_pre'
]

# keep only those that exist (some sources may lack a few)
final_cols = [c for c in wanted_cols if c in latest_renamed.columns]
features_2 = latest_renamed[final_cols].copy()
assert len(set(list(features_2.columns)) - set(['game_id', 'team', 'opponent',
       'lag1_adj_def_eff', 'lag1_adj_off_eff',
       'lag1_adj_tempo', 'lag1_barthag', 'lag1_drb', 'lag1_efg_pct',
       'lag1_efgd_pct', 'lag1_ftr', 'lag1_ftrd', 'lag1_games', 'lag1_orb',
       'lag1_rank', 'lag1_three_pt_def_pct', 'lag1_three_pt_def_rt',
       'lag1_three_pt_pct', 'lag1_three_pt_rt', 'lag1_tor', 'lag1_tord',
       'lag1_two_pt_def_pct', 'lag1_two_pt_pct', 'lag1_wab',
       'lag1_opp_adj_def_eff', 'lag1_opp_adj_off_eff',
       'lag1_opp_adj_tempo', 'lag1_opp_barthag', 'lag1_opp_drb',
       'lag1_opp_efg_pct', 'lag1_opp_efgd_pct', 'lag1_opp_ftr',
       'lag1_opp_ftrd', 'lag1_opp_games', 'lag1_opp_orb', 'lag1_opp_rank',
       'lag1_opp_three_pt_def_pct', 'lag1_opp_three_pt_def_rt',
       'lag1_opp_three_pt_pct', 'lag1_opp_three_pt_rt', 'lag1_opp_tor',
       'lag1_opp_tord', 'lag1_opp_two_pt_def_pct', 'lag1_opp_two_pt_pct',
       'lag1_opp_wab', 'opp_rank_cummean_incl', 'opp_rank_cummean_pre'])) == 0

In [19]:
import pandas as pd
import numpy as np

def build_cbb_features_inference(raw_df, windows=(1,3,5,10), ewm_halflife=5):
    df = raw_df.copy()

    # --- 0) Build a proper datetime (UTC if you have date_utc/time_utc) ---
    if 'date_utc' in df.columns and df['date_utc'].notna().any():
        if 'time_utc' in df.columns:
            df['game_dt'] = pd.to_datetime(
                df['date_utc'].astype(str).str.strip() + ' ' +
                df['time_utc'].fillna('00:00:00').astype(str).str.strip(),
                errors='coerce', utc=True
            )
        else:
            df['game_dt'] = pd.to_datetime(df['date_utc'], errors='coerce', utc=True)
    else:
        df['game_dt'] = pd.to_datetime(df.get('date', pd.NaT), errors='coerce')

    # Keep a simple sortable date for later (optional)
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'], errors='coerce')
    else:
        df['date'] = df['game_dt'].dt.tz_convert('UTC').dt.date

    # --- 1) Identify base stat names (strip _home/_away) ---
    def _base_names(columns):
        bases = set()
        for c in columns:
            if c.endswith('_home'):
                bases.add(c[:-5])
            elif c.endswith('_away'):
                bases.add(c[:-5])
        return sorted(bases)

    stat_bases = _base_names(df.columns)

    # --- 2) Long-format: one row per team per game (home & away views) ---
    def _make_team_rows(side):
        assert side in ('home','away')
        other = 'away' if side == 'home' else 'home'

        # map base stat names to (team_col, opp_col) for this perspective
        base_map = {}
        for b in stat_bases:
            team_col = f'{b}_{side}'
            opp_col  = f'{b}_{other}'
            if team_col in df.columns and opp_col in df.columns:
                base_map[b] = (team_col, opp_col)

        is_home_val = 1 if side == 'home' else 0

        out = pd.DataFrame({
            'game_id': df['game_id'],
            'season' : df['season'],
            'date'   : df['date'],
            'game_dt': df['game_dt'],
            'team'   : df[side],
            'opponent': df[other],
            'is_home': np.full(len(df), is_home_val, dtype=np.int8),
        })

        # standardize scores/margins
        if f'{side}_score' in df.columns and f'{other}_score' in df.columns:
            out['team_score'] = pd.to_numeric(df[f'{side}_score'], errors='coerce')
            out['opp_score']  = pd.to_numeric(df[f'{other}_score'], errors='coerce')

        if f'{side}_margin' in df.columns and f'{other}_margin' in df.columns:
            out['team_margin'] = pd.to_numeric(df[f'{side}_margin'], errors='coerce')
            out['opp_margin']  = pd.to_numeric(df[f'{other}_margin'], errors='coerce')

        if 'neutral_site' in df.columns:
            out['neutral_site'] = pd.Series(df['neutral_site']).fillna(0).astype(int)

        # copy per-game stats into standardized columns: <base> and opp_<base>
        for b,(tc,oc) in base_map.items():
            out[b] = pd.to_numeric(df[tc], errors='coerce')
            out[f'opp_{b}'] = pd.to_numeric(df[oc], errors='coerce')

        # standardized 1H / 2H points
        if f'{side}_1h' in df.columns and f'{other}_1h' in df.columns:
            out['points_1h']     = pd.to_numeric(df[f'{side}_1h'], errors='coerce')
            out['opp_points_1h'] = pd.to_numeric(df[f'{other}_1h'], errors='coerce')

        if f'{side}_2h' in df.columns and f'{other}_2h' in df.columns:
            out['points_2h']     = pd.to_numeric(df[f'{side}_2h'], errors='coerce')
            out['opp_points_2h'] = pd.to_numeric(df[f'{other}_2h'], errors='coerce')

        return out

    long_home = _make_team_rows('home')
    long_away = _make_team_rows('away')
    team_games = pd.concat([long_home, long_away], ignore_index=True)
    team_games = team_games.sort_values(['team','season','game_dt','game_id']).reset_index(drop=True)

    # --- 3) Rest days (uses previous game_dt, but result is current snapshot—no shift in features) ---
    team_games['prev_game_dt'] = team_games.groupby(['team','season'])['game_dt'].shift(1)
    team_games['rest_days'] = (team_games['game_dt'] - team_games['prev_game_dt']).dt.days
    team_games['rest_days'] = team_games['rest_days'].fillna(7)
    for w in windows:
        team_games[f'ra_rest_days_w{w}'] = (
            team_games.groupby(['team','season'])['rest_days']
            .transform(lambda s: s.rolling(w, min_periods=1).mean())
        )

    # --- 4) Rolling/EWM features for team & allowed (NO shift for inference) ---
    full_bases = [b for b in stat_bases if b in team_games.columns]

    def _roll(s, w):  return s.rolling(w, min_periods=1).mean()
    def _rstd(s, w):  return s.rolling(w, min_periods=2).std()
    def _ewm(s, hl):  return s.ewm(halflife=hl, min_periods=1, adjust=False).mean()

    for b in full_bases:
        # team rolling
        for w in windows:
            team_games[f'ra_{b}_w{w}'] = team_games.groupby(['team','season'])[b].transform(lambda s: _roll(s, w))
        team_games[f'rstd_{b}_w5'] = team_games.groupby(['team','season'])[b].transform(lambda s: _rstd(s, 5))
        team_games[f'ewm_{b}_hl{ewm_halflife}'] = team_games.groupby(['team','season'])[b].transform(lambda s: _ewm(s, ewm_halflife))

        # allowed
        ob = f'opp_{b}'
        if ob in team_games.columns:
            for w in windows:
                team_games[f'ra_allowed_{b}_w{w}'] = team_games.groupby(['team','season'])[ob].transform(lambda s: _roll(s, w))
            team_games[f'rstd_allowed_{b}_w5'] = team_games.groupby(['team','season'])[ob].transform(lambda s: _rstd(s, 5))
            team_games[f'ewm_allowed_{b}_hl{ewm_halflife}'] = team_games.groupby(['team','season'])[ob].transform(lambda s: _ewm(s, ewm_halflife))

    # 4b) 1H/2H rolling, allowed
    for b in ['points_1h', 'points_2h']:
        if b in team_games.columns:
            for w in windows:
                team_games[f'ra_{b}_w{w}'] = team_games.groupby(['team','season'])[b].transform(lambda s: _roll(s, w))
            ob = f'opp_{b}'
            if ob in team_games.columns:
                for w in windows:
                    team_games[f'ra_allowed_{b}_w{w}'] = team_games.groupby(['team','season'])[ob].transform(lambda s: _roll(s, w))

    # --- 5) Venue effects (home vs away) ---
    if 'team_margin' in team_games.columns:
        for w in windows:
            team_games[f'ra_margin_homeonly_w{w}'] = (
                team_games.groupby(['team','season','is_home'])['team_margin'].transform(lambda s: _roll(s, w))
            )
        for w in windows:
            team_games[f'ra_margin_w{w}'] = team_games.groupby(['team','season'])['team_margin'].transform(lambda s: _roll(s, w))

    # --- 6) Recent scoring form ---
    if 'team_score' in team_games.columns and 'opp_score' in team_games.columns:
        for w in windows:
            pf = team_games.groupby(['team','season'])['team_score'].transform(lambda s: _roll(s, w))
            pa = team_games.groupby(['team','season'])['opp_score' ].transform(lambda s: _roll(s, w))
            team_games[f'ra_points_for_w{w}']     = pf
            team_games[f'ra_points_against_w{w}'] = pa
            team_games[f'ra_point_diff_w{w}']     = pf - pa

    # --- 7) Build the "pregame" (inference) feature table ---
    # We exclude raw current-game single-game columns that could leak (the current box score isn’t known)
    leak_cols = ['points_1h','opp_points_1h','points_2h','opp_points_2h',
                 'team_score','opp_score','team_margin','opp_margin','prev_game_dt']
    raw_stat_cols = full_bases + [f'opp_{b}' for b in full_bases] + leak_cols
    raw_stat_cols = [c for c in raw_stat_cols if c in team_games.columns]

    feature_cols = [c for c in team_games.columns if c not in (raw_stat_cols + ['opponent'])]
    pregame_team_features = team_games[feature_cols + ['opponent']].copy()

    # --- 8) Latest snapshot per team-season (no leak; includes most recent game) ---
    latest_snapshot = (
        pregame_team_features
        .sort_values(['team','season','game_dt','game_id'])
        .groupby(['team','season'], as_index=False, sort=False)
        .tail(1)
        .reset_index(drop=True)
    )

    # If you want one row per team (across seasons):
    # latest_snapshot = latest_snapshot.sort_values(['date','team']).drop_duplicates('team', keep='last')

    # --- 9) Match your requested output column list (keep those that exist) ---
    wanted = ['game_id', 'team', 'rest_days',
              'ra_rest_days_w1','ra_rest_days_w3','ra_rest_days_w5','ra_rest_days_w10',
              'ra_assists_w1','ra_allowed_assists_w1','ra_assists_w3','ra_allowed_assists_w3',
              'ra_assists_w5','ra_allowed_assists_w5','ra_assists_w10','ra_allowed_assists_w10',
              'rstd_assists_w5','ewm_assists_hl5','rstd_allowed_assists_w5','ewm_allowed_assists_hl5',
              'ra_blocks_w1','ra_allowed_blocks_w1','ra_blocks_w3','ra_allowed_blocks_w3',
              'ra_blocks_w5','ra_allowed_blocks_w5','ra_blocks_w10','ra_allowed_blocks_w10',
              'rstd_blocks_w5','ewm_blocks_hl5','rstd_allowed_blocks_w5','ewm_allowed_blocks_hl5',
              'ra_defensiveRebounds_w1','ra_allowed_defensiveRebounds_w1','ra_defensiveRebounds_w3',
              'ra_allowed_defensiveRebounds_w3','ra_defensiveRebounds_w5','ra_allowed_defensiveRebounds_w5',
              'ra_defensiveRebounds_w10','ra_allowed_defensiveRebounds_w10',
              'rstd_defensiveRebounds_w5','ewm_defensiveRebounds_hl5',
              'rstd_allowed_defensiveRebounds_w5','ewm_allowed_defensiveRebounds_hl5',
              'ra_fastBreakPoints_w1','ra_allowed_fastBreakPoints_w1','ra_fastBreakPoints_w3',
              'ra_allowed_fastBreakPoints_w3','ra_fastBreakPoints_w5','ra_allowed_fastBreakPoints_w5',
              'ra_fastBreakPoints_w10','ra_allowed_fastBreakPoints_w10',
              'rstd_fastBreakPoints_w5','ewm_fastBreakPoints_hl5',
              'rstd_allowed_fastBreakPoints_w5','ewm_allowed_fastBreakPoints_hl5',
              'ra_flagrantFouls_w1','ra_allowed_flagrantFouls_w1','ra_flagrantFouls_w3',
              'ra_allowed_flagrantFouls_w3','ra_flagrantFouls_w5','ra_allowed_flagrantFouls_w5',
              'ra_flagrantFouls_w10','ra_allowed_flagrantFouls_w10',
              'rstd_flagrantFouls_w5','ewm_flagrantFouls_hl5',
              'rstd_allowed_flagrantFouls_w5','ewm_allowed_flagrantFouls_hl5',
              'ra_fouls_w1','ra_allowed_fouls_w1','ra_fouls_w3','ra_allowed_fouls_w3',
              'ra_fouls_w5','ra_allowed_fouls_w5','ra_fouls_w10','ra_allowed_fouls_w10',
              'rstd_fouls_w5','ewm_fouls_hl5','rstd_allowed_fouls_w5','ewm_allowed_fouls_hl5',
              'ra_offensiveRebounds_w1','ra_allowed_offensiveRebounds_w1','ra_offensiveRebounds_w3',
              'ra_allowed_offensiveRebounds_w3','ra_offensiveRebounds_w5','ra_allowed_offensiveRebounds_w5',
              'ra_offensiveRebounds_w10','ra_allowed_offensiveRebounds_w10',
              'rstd_offensiveRebounds_w5','ewm_offensiveRebounds_hl5',
              'rstd_allowed_offensiveRebounds_w5','ewm_allowed_offensiveRebounds_hl5',
              'ra_pointsInPaint_w1','ra_allowed_pointsInPaint_w1','ra_pointsInPaint_w3',
              'ra_allowed_pointsInPaint_w3','ra_pointsInPaint_w5','ra_allowed_pointsInPaint_w5',
              'ra_pointsInPaint_w10','ra_allowed_pointsInPaint_w10',
              'rstd_pointsInPaint_w5','ewm_pointsInPaint_hl5',
              'rstd_allowed_pointsInPaint_w5','ewm_allowed_pointsInPaint_hl5',
              'ra_steals_w1','ra_allowed_steals_w1','ra_steals_w3','ra_allowed_steals_w3',
              'ra_steals_w5','ra_allowed_steals_w5','ra_steals_w10','ra_allowed_steals_w10',
              'rstd_steals_w5','ewm_steals_hl5','rstd_allowed_steals_w5','ewm_allowed_steals_hl5',
              'ra_technicalFouls_w1','ra_allowed_technicalFouls_w1','ra_technicalFouls_w3',
              'ra_allowed_technicalFouls_w3','ra_technicalFouls_w5','ra_allowed_technicalFouls_w5',
              'ra_technicalFouls_w10','ra_allowed_technicalFouls_w10',
              'rstd_technicalFouls_w5','ewm_technicalFouls_hl5',
              'rstd_allowed_technicalFouls_w5','ewm_allowed_technicalFouls_hl5',
              'ra_totalRebounds_w1','ra_allowed_totalRebounds_w1','ra_totalRebounds_w3',
              'ra_allowed_totalRebounds_w3','ra_totalRebounds_w5','ra_allowed_totalRebounds_w5',
              'ra_totalRebounds_w10','ra_allowed_totalRebounds_w10',
              'rstd_totalRebounds_w5','ewm_totalRebounds_hl5',
              'rstd_allowed_totalRebounds_w5','ewm_allowed_totalRebounds_hl5',
              'ra_turnoverPoints_w1','ra_allowed_turnoverPoints_w1','ra_turnoverPoints_w3',
              'ra_allowed_turnoverPoints_w3','ra_turnoverPoints_w5','ra_allowed_turnoverPoints_w5',
              'ra_turnoverPoints_w10','ra_allowed_turnoverPoints_w10',
              'rstd_turnoverPoints_w5','ewm_turnoverPoints_hl5',
              'rstd_allowed_turnoverPoints_w5','ewm_allowed_turnoverPoints_hl5',
              'ra_turnovers_w1','ra_allowed_turnovers_w1','ra_turnovers_w3','ra_allowed_turnovers_w3',
              'ra_turnovers_w5','ra_allowed_turnovers_w5','ra_turnovers_w10','ra_allowed_turnovers_w10',
              'rstd_turnovers_w5','ewm_turnovers_hl5','rstd_allowed_turnovers_w5','ewm_allowed_turnovers_hl5',
              'ra_points_1h_w1','ra_points_1h_w3','ra_points_1h_w5','ra_points_1h_w10',
              'ra_allowed_points_1h_w1','ra_allowed_points_1h_w3','ra_allowed_points_1h_w5','ra_allowed_points_1h_w10',
              'ra_points_2h_w1','ra_points_2h_w3','ra_points_2h_w5','ra_points_2h_w10',
              'ra_allowed_points_2h_w1','ra_allowed_points_2h_w3','ra_allowed_points_2h_w5','ra_allowed_points_2h_w10',
              'ra_margin_homeonly_w1','ra_margin_homeonly_w3','ra_margin_homeonly_w5','ra_margin_homeonly_w10',
              'ra_points_for_w1','ra_points_against_w1','ra_point_diff_w1',
              'ra_points_for_w3','ra_points_against_w3','ra_point_diff_w3',
              'ra_points_for_w5','ra_points_against_w5','ra_point_diff_w5',
              'ra_points_for_w10','ra_points_against_w10','ra_point_diff_w10',
              'ra_margin_w1','ra_margin_w3','ra_margin_w5','ra_margin_w10'
             ]

    keep = [c for c in wanted if c in latest_snapshot.columns]
    features_3 = latest_snapshot[['team'] + keep] if 'team' not in keep else latest_snapshot[keep]

    return features_3

# -----------------------
# USAGE
# -----------------------
features_3 = build_cbb_features_inference(
    merged_df[['game_id','season','date','date_utc','time_utc','neutral_site','home','away',
               'home_1h','away_1h','home_2h','away_2h','home_score','away_score',
               'home_margin','away_margin',
               'assists_home','fouls_home','technicalFouls_home','flagrantFouls_home',
               'totalRebounds_home','offensiveRebounds_home','defensiveRebounds_home',
               'pointsInPaint_home','turnovers_home','turnoverPoints_home','steals_home',
               'blocks_home','fastBreakPoints_home',
               'assists_away','fouls_away','technicalFouls_away','flagrantFouls_away',
               'totalRebounds_away','offensiveRebounds_away','defensiveRebounds_away',
               'pointsInPaint_away','turnovers_away','turnoverPoints_away','steals_away',
               'blocks_away','fastBreakPoints_away']]
)

# features_3: one row per team-season, with the rolling/EWM features
assert len(set(list(features_3.columns)) - set(['game_id', 'team', 'rest_days', 'ra_rest_days_w1', 'ra_rest_days_w3', 'ra_rest_days_w5', 'ra_rest_days_w10', 'ra_assists_w1', 'ra_allowed_assists_w1', 'ra_assists_w3', 'ra_allowed_assists_w3', 'ra_assists_w5', 'ra_allowed_assists_w5', 'ra_assists_w10', 'ra_allowed_assists_w10', 'rstd_assists_w5', 'ewm_assists_hl5', 'rstd_allowed_assists_w5', 'ewm_allowed_assists_hl5', 'ra_blocks_w1', 'ra_allowed_blocks_w1', 'ra_blocks_w3', 'ra_allowed_blocks_w3', 'ra_blocks_w5', 'ra_allowed_blocks_w5', 'ra_blocks_w10', 'ra_allowed_blocks_w10', 'rstd_blocks_w5', 'ewm_blocks_hl5', 'rstd_allowed_blocks_w5', 'ewm_allowed_blocks_hl5', 'ra_defensiveRebounds_w1', 'ra_allowed_defensiveRebounds_w1', 'ra_defensiveRebounds_w3', 'ra_allowed_defensiveRebounds_w3', 'ra_defensiveRebounds_w5', 'ra_allowed_defensiveRebounds_w5', 'ra_defensiveRebounds_w10', 'ra_allowed_defensiveRebounds_w10', 'rstd_defensiveRebounds_w5', 'ewm_defensiveRebounds_hl5', 'rstd_allowed_defensiveRebounds_w5', 'ewm_allowed_defensiveRebounds_hl5', 'ra_fastBreakPoints_w1', 'ra_allowed_fastBreakPoints_w1', 'ra_fastBreakPoints_w3', 'ra_allowed_fastBreakPoints_w3', 'ra_fastBreakPoints_w5', 'ra_allowed_fastBreakPoints_w5', 'ra_fastBreakPoints_w10', 'ra_allowed_fastBreakPoints_w10', 'rstd_fastBreakPoints_w5', 'ewm_fastBreakPoints_hl5', 'rstd_allowed_fastBreakPoints_w5', 'ewm_allowed_fastBreakPoints_hl5', 'ra_flagrantFouls_w1', 'ra_allowed_flagrantFouls_w1', 'ra_flagrantFouls_w3', 'ra_allowed_flagrantFouls_w3', 'ra_flagrantFouls_w5', 'ra_allowed_flagrantFouls_w5', 'ra_flagrantFouls_w10', 'ra_allowed_flagrantFouls_w10', 'rstd_flagrantFouls_w5', 'ewm_flagrantFouls_hl5', 'rstd_allowed_flagrantFouls_w5', 'ewm_allowed_flagrantFouls_hl5', 'ra_fouls_w1', 'ra_allowed_fouls_w1', 'ra_fouls_w3', 'ra_allowed_fouls_w3', 'ra_fouls_w5', 'ra_allowed_fouls_w5', 'ra_fouls_w10', 'ra_allowed_fouls_w10', 'rstd_fouls_w5', 'ewm_fouls_hl5', 'rstd_allowed_fouls_w5', 'ewm_allowed_fouls_hl5', 'ra_offensiveRebounds_w1', 'ra_allowed_offensiveRebounds_w1', 'ra_offensiveRebounds_w3', 'ra_allowed_offensiveRebounds_w3', 'ra_offensiveRebounds_w5', 'ra_allowed_offensiveRebounds_w5', 'ra_offensiveRebounds_w10', 'ra_allowed_offensiveRebounds_w10', 'rstd_offensiveRebounds_w5', 'ewm_offensiveRebounds_hl5', 'rstd_allowed_offensiveRebounds_w5', 'ewm_allowed_offensiveRebounds_hl5', 'ra_pointsInPaint_w1', 'ra_allowed_pointsInPaint_w1', 'ra_pointsInPaint_w3', 'ra_allowed_pointsInPaint_w3', 'ra_pointsInPaint_w5', 'ra_allowed_pointsInPaint_w5', 'ra_pointsInPaint_w10', 'ra_allowed_pointsInPaint_w10', 'rstd_pointsInPaint_w5', 'ewm_pointsInPaint_hl5', 'rstd_allowed_pointsInPaint_w5', 'ewm_allowed_pointsInPaint_hl5', 'ra_steals_w1', 'ra_allowed_steals_w1', 'ra_steals_w3', 'ra_allowed_steals_w3', 'ra_steals_w5', 'ra_allowed_steals_w5', 'ra_steals_w10', 'ra_allowed_steals_w10', 'rstd_steals_w5', 'ewm_steals_hl5', 'rstd_allowed_steals_w5', 'ewm_allowed_steals_hl5', 'ra_technicalFouls_w1', 'ra_allowed_technicalFouls_w1', 'ra_technicalFouls_w3', 'ra_allowed_technicalFouls_w3', 'ra_technicalFouls_w5', 'ra_allowed_technicalFouls_w5', 'ra_technicalFouls_w10', 'ra_allowed_technicalFouls_w10', 'rstd_technicalFouls_w5', 'ewm_technicalFouls_hl5', 'rstd_allowed_technicalFouls_w5', 'ewm_allowed_technicalFouls_hl5', 'ra_totalRebounds_w1', 'ra_allowed_totalRebounds_w1', 'ra_totalRebounds_w3', 'ra_allowed_totalRebounds_w3', 'ra_totalRebounds_w5', 'ra_allowed_totalRebounds_w5', 'ra_totalRebounds_w10', 'ra_allowed_totalRebounds_w10', 'rstd_totalRebounds_w5', 'ewm_totalRebounds_hl5', 'rstd_allowed_totalRebounds_w5', 'ewm_allowed_totalRebounds_hl5', 'ra_turnoverPoints_w1', 'ra_allowed_turnoverPoints_w1', 'ra_turnoverPoints_w3', 'ra_allowed_turnoverPoints_w3', 'ra_turnoverPoints_w5', 'ra_allowed_turnoverPoints_w5', 'ra_turnoverPoints_w10', 'ra_allowed_turnoverPoints_w10', 'rstd_turnoverPoints_w5', 'ewm_turnoverPoints_hl5', 'rstd_allowed_turnoverPoints_w5', 'ewm_allowed_turnoverPoints_hl5', 'ra_turnovers_w1', 'ra_allowed_turnovers_w1', 'ra_turnovers_w3', 'ra_allowed_turnovers_w3', 'ra_turnovers_w5', 'ra_allowed_turnovers_w5', 'ra_turnovers_w10', 'ra_allowed_turnovers_w10', 'rstd_turnovers_w5', 'ewm_turnovers_hl5', 'rstd_allowed_turnovers_w5', 'ewm_allowed_turnovers_hl5', 'ra_points_1h_w1', 'ra_points_1h_w3', 'ra_points_1h_w5', 'ra_points_1h_w10', 'ra_allowed_points_1h_w1', 'ra_allowed_points_1h_w3', 'ra_allowed_points_1h_w5', 'ra_allowed_points_1h_w10', 'ra_points_2h_w1', 'ra_points_2h_w3', 'ra_points_2h_w5', 'ra_points_2h_w10', 'ra_allowed_points_2h_w1', 'ra_allowed_points_2h_w3', 'ra_allowed_points_2h_w5', 'ra_allowed_points_2h_w10', 'ra_margin_homeonly_w1', 'ra_margin_homeonly_w3', 'ra_margin_homeonly_w5', 'ra_margin_homeonly_w10', 'ra_points_for_w1', 'ra_points_against_w1', 'ra_point_diff_w1', 'ra_points_for_w3', 'ra_points_against_w3', 'ra_point_diff_w3', 'ra_points_for_w5', 'ra_points_against_w5', 'ra_point_diff_w5', 'ra_points_for_w10', 'ra_points_against_w10', 'ra_point_diff_w10', 'ra_margin_w1', 'ra_margin_w3', 'ra_margin_w5', 'ra_margin_w10'])) == 0

### INFERENCE DATAFRAME

In [22]:
game_info = pd.read_csv("daily-games/daily.csv")
game_info = game_info.merge(features_1, on="game_id", how="left")

home_merge = features_2.copy()
home_merge = home_merge.rename(columns=lambda c: f"{c}_home" if c not in ["game_id", "team", "opponent"] else c)
merged_home = game_info.merge(
    home_merge,
    left_on=["game_id", "home"],
    right_on=["game_id", "team"],
    how="left",
    validate="1:1"
).drop(columns=["team", "opponent"])

# --- AWAY TEAM MERGE ---
away_merge = features_2.copy()
away_merge = away_merge.rename(columns=lambda c: f"{c}_away" if c not in ["game_id", "team", "opponent"] else c)
game_info = merged_home.merge(
    away_merge,
    left_on=["game_id", "away"],
    right_on=["game_id", "team"],
    how="left",
    validate="1:1"
).drop(columns=["team", "opponent"])

key_cols = ['game_id', 'team']

# --- HOME merge ---
home_feats = features_3.copy()
home_feats = home_feats.rename(columns=lambda c: f"{c}_home" if c not in key_cols else c)

out = game_info.merge(
    home_feats,
    left_on=['game_id', 'home'],
    right_on=['game_id', 'team'],
    how='left',
    validate='1:1'
).drop(columns=['team'])

# --- AWAY merge ---
away_feats = features_3.copy()
away_feats = away_feats.rename(columns=lambda c: f"{c}_away" if c not in key_cols else c)

game_info = out.merge(
    away_feats,
    left_on=['game_id', 'away'],
    right_on=['game_id', 'team'],
    how='left',
    validate='1:1'
).drop(columns=['team'])

game_info = game_info.dropna(subset=["home", "away"])

game_info[['utc_seconds_since_midnight', 'utc_hour_sin',
       'utc_hour_cos', 'et_hour', 'et_minute', 'et_second',
       'et_seconds_since_midnight', 'et_hour_sin', 'et_hour_cos',
       'et_is_weekend', 'et_is_primetime', 'et_daypart_afternoon',
       'et_daypart_evening', 'et_daypart_late', 'et_daypart_morning',
       'official_1_code', 'official_2_code', 'official_3_code',
       'conf_home_code', 'conf_away_code', 'lag1_adj_def_eff_home',
       'lag1_adj_off_eff_home', 'lag1_adj_tempo_home',
       'lag1_barthag_home', 'lag1_drb_home', 'lag1_efg_pct_home',
       'lag1_efgd_pct_home', 'lag1_ftr_home', 'lag1_ftrd_home',
       'lag1_games_home', 'lag1_orb_home', 'lag1_rank_home',
       'lag1_three_pt_def_pct_home', 'lag1_three_pt_def_rt_home',
       'lag1_three_pt_pct_home', 'lag1_three_pt_rt_home', 'lag1_tor_home',
       'lag1_tord_home', 'lag1_two_pt_def_pct_home',
       'lag1_two_pt_pct_home', 'lag1_wab_home',
       'lag1_opp_adj_def_eff_home', 'lag1_opp_adj_off_eff_home',
       'lag1_opp_adj_tempo_home', 'lag1_opp_barthag_home',
       'lag1_opp_drb_home', 'lag1_opp_efg_pct_home',
       'lag1_opp_efgd_pct_home', 'lag1_opp_ftr_home',
       'lag1_opp_ftrd_home', 'lag1_opp_games_home', 'lag1_opp_orb_home',
       'lag1_opp_rank_home', 'lag1_opp_three_pt_def_pct_home',
       'lag1_opp_three_pt_def_rt_home', 'lag1_opp_three_pt_pct_home',
       'lag1_opp_three_pt_rt_home', 'lag1_opp_tor_home',
       'lag1_opp_tord_home', 'lag1_opp_two_pt_def_pct_home',
       'lag1_opp_two_pt_pct_home', 'lag1_opp_wab_home',
       'opp_rank_cummean_incl_home', 'opp_rank_cummean_pre_home',
       'lag1_adj_def_eff_away', 'lag1_adj_off_eff_away',
       'lag1_adj_tempo_away', 'lag1_barthag_away', 'lag1_drb_away',
       'lag1_efg_pct_away', 'lag1_efgd_pct_away', 'lag1_ftr_away',
       'lag1_ftrd_away', 'lag1_games_away', 'lag1_orb_away',
       'lag1_rank_away', 'lag1_three_pt_def_pct_away',
       'lag1_three_pt_def_rt_away', 'lag1_three_pt_pct_away',
       'lag1_three_pt_rt_away', 'lag1_tor_away', 'lag1_tord_away',
       'lag1_two_pt_def_pct_away', 'lag1_two_pt_pct_away',
       'lag1_wab_away', 'lag1_opp_adj_def_eff_away',
       'lag1_opp_adj_off_eff_away', 'lag1_opp_adj_tempo_away',
       'lag1_opp_barthag_away', 'lag1_opp_drb_away',
       'lag1_opp_efg_pct_away', 'lag1_opp_efgd_pct_away',
       'lag1_opp_ftr_away', 'lag1_opp_ftrd_away', 'lag1_opp_games_away',
       'lag1_opp_orb_away', 'lag1_opp_rank_away',
       'lag1_opp_three_pt_def_pct_away', 'lag1_opp_three_pt_def_rt_away',
       'lag1_opp_three_pt_pct_away', 'lag1_opp_three_pt_rt_away',
       'lag1_opp_tor_away', 'lag1_opp_tord_away',
       'lag1_opp_two_pt_def_pct_away', 'lag1_opp_two_pt_pct_away',
       'lag1_opp_wab_away', 'opp_rank_cummean_incl_away',
       'opp_rank_cummean_pre_away', 'rest_days_home',
       'ra_rest_days_w1_home', 'ra_rest_days_w3_home',
       'ra_rest_days_w5_home', 'ra_rest_days_w10_home',
       'ra_assists_w1_home', 'ra_allowed_assists_w1_home',
       'ra_assists_w3_home', 'ra_allowed_assists_w3_home',
       'ra_assists_w5_home', 'ra_allowed_assists_w5_home',
       'ra_assists_w10_home', 'ra_allowed_assists_w10_home',
       'rstd_assists_w5_home', 'ewm_assists_hl5_home',
       'rstd_allowed_assists_w5_home', 'ewm_allowed_assists_hl5_home',
       'ra_blocks_w1_home', 'ra_allowed_blocks_w1_home',
       'ra_blocks_w3_home', 'ra_allowed_blocks_w3_home',
       'ra_blocks_w5_home', 'ra_allowed_blocks_w5_home',
       'ra_blocks_w10_home', 'ra_allowed_blocks_w10_home',
       'rstd_blocks_w5_home', 'ewm_blocks_hl5_home',
       'rstd_allowed_blocks_w5_home', 'ewm_allowed_blocks_hl5_home',
       'ra_defensiveRebounds_w1_home',
       'ra_allowed_defensiveRebounds_w1_home',
       'ra_defensiveRebounds_w3_home',
       'ra_allowed_defensiveRebounds_w3_home',
       'ra_defensiveRebounds_w5_home',
       'ra_allowed_defensiveRebounds_w5_home',
       'ra_defensiveRebounds_w10_home',
       'ra_allowed_defensiveRebounds_w10_home',
       'rstd_defensiveRebounds_w5_home', 'ewm_defensiveRebounds_hl5_home',
       'rstd_allowed_defensiveRebounds_w5_home',
       'ewm_allowed_defensiveRebounds_hl5_home',
       'ra_fastBreakPoints_w1_home', 'ra_allowed_fastBreakPoints_w1_home',
       'ra_fastBreakPoints_w3_home', 'ra_allowed_fastBreakPoints_w3_home',
       'ra_fastBreakPoints_w5_home', 'ra_allowed_fastBreakPoints_w5_home',
       'ra_fastBreakPoints_w10_home',
       'ra_allowed_fastBreakPoints_w10_home',
       'rstd_fastBreakPoints_w5_home', 'ewm_fastBreakPoints_hl5_home',
       'rstd_allowed_fastBreakPoints_w5_home',
       'ewm_allowed_fastBreakPoints_hl5_home', 'ra_flagrantFouls_w1_home',
       'ra_allowed_flagrantFouls_w1_home', 'ra_flagrantFouls_w3_home',
       'ra_allowed_flagrantFouls_w3_home', 'ra_flagrantFouls_w5_home',
       'ra_allowed_flagrantFouls_w5_home', 'ra_flagrantFouls_w10_home',
       'ra_allowed_flagrantFouls_w10_home', 'rstd_flagrantFouls_w5_home',
       'ewm_flagrantFouls_hl5_home', 'rstd_allowed_flagrantFouls_w5_home',
       'ewm_allowed_flagrantFouls_hl5_home', 'ra_fouls_w1_home',
       'ra_allowed_fouls_w1_home', 'ra_fouls_w3_home',
       'ra_allowed_fouls_w3_home', 'ra_fouls_w5_home',
       'ra_allowed_fouls_w5_home', 'ra_fouls_w10_home',
       'ra_allowed_fouls_w10_home', 'rstd_fouls_w5_home',
       'ewm_fouls_hl5_home', 'rstd_allowed_fouls_w5_home',
       'ewm_allowed_fouls_hl5_home', 'ra_offensiveRebounds_w1_home',
       'ra_allowed_offensiveRebounds_w1_home',
       'ra_offensiveRebounds_w3_home',
       'ra_allowed_offensiveRebounds_w3_home',
       'ra_offensiveRebounds_w5_home',
       'ra_allowed_offensiveRebounds_w5_home',
       'ra_offensiveRebounds_w10_home',
       'ra_allowed_offensiveRebounds_w10_home',
       'rstd_offensiveRebounds_w5_home', 'ewm_offensiveRebounds_hl5_home',
       'rstd_allowed_offensiveRebounds_w5_home',
       'ewm_allowed_offensiveRebounds_hl5_home',
       'ra_pointsInPaint_w1_home', 'ra_allowed_pointsInPaint_w1_home',
       'ra_pointsInPaint_w3_home', 'ra_allowed_pointsInPaint_w3_home',
       'ra_pointsInPaint_w5_home', 'ra_allowed_pointsInPaint_w5_home',
       'ra_pointsInPaint_w10_home', 'ra_allowed_pointsInPaint_w10_home',
       'rstd_pointsInPaint_w5_home', 'ewm_pointsInPaint_hl5_home',
       'rstd_allowed_pointsInPaint_w5_home',
       'ewm_allowed_pointsInPaint_hl5_home', 'ra_steals_w1_home',
       'ra_allowed_steals_w1_home', 'ra_steals_w3_home',
       'ra_allowed_steals_w3_home', 'ra_steals_w5_home',
       'ra_allowed_steals_w5_home', 'ra_steals_w10_home',
       'ra_allowed_steals_w10_home', 'rstd_steals_w5_home',
       'ewm_steals_hl5_home', 'rstd_allowed_steals_w5_home',
       'ewm_allowed_steals_hl5_home', 'ra_technicalFouls_w1_home',
       'ra_allowed_technicalFouls_w1_home', 'ra_technicalFouls_w3_home',
       'ra_allowed_technicalFouls_w3_home', 'ra_technicalFouls_w5_home',
       'ra_allowed_technicalFouls_w5_home', 'ra_technicalFouls_w10_home',
       'ra_allowed_technicalFouls_w10_home',
       'rstd_technicalFouls_w5_home', 'ewm_technicalFouls_hl5_home',
       'rstd_allowed_technicalFouls_w5_home',
       'ewm_allowed_technicalFouls_hl5_home', 'ra_totalRebounds_w1_home',
       'ra_allowed_totalRebounds_w1_home', 'ra_totalRebounds_w3_home',
       'ra_allowed_totalRebounds_w3_home', 'ra_totalRebounds_w5_home',
       'ra_allowed_totalRebounds_w5_home', 'ra_totalRebounds_w10_home',
       'ra_allowed_totalRebounds_w10_home', 'rstd_totalRebounds_w5_home',
       'ewm_totalRebounds_hl5_home', 'rstd_allowed_totalRebounds_w5_home',
       'ewm_allowed_totalRebounds_hl5_home', 'ra_turnoverPoints_w1_home',
       'ra_allowed_turnoverPoints_w1_home', 'ra_turnoverPoints_w3_home',
       'ra_allowed_turnoverPoints_w3_home', 'ra_turnoverPoints_w5_home',
       'ra_allowed_turnoverPoints_w5_home', 'ra_turnoverPoints_w10_home',
       'ra_allowed_turnoverPoints_w10_home',
       'rstd_turnoverPoints_w5_home', 'ewm_turnoverPoints_hl5_home',
       'rstd_allowed_turnoverPoints_w5_home',
       'ewm_allowed_turnoverPoints_hl5_home', 'ra_turnovers_w1_home',
       'ra_allowed_turnovers_w1_home', 'ra_turnovers_w3_home',
       'ra_allowed_turnovers_w3_home', 'ra_turnovers_w5_home',
       'ra_allowed_turnovers_w5_home', 'ra_turnovers_w10_home',
       'ra_allowed_turnovers_w10_home', 'rstd_turnovers_w5_home',
       'ewm_turnovers_hl5_home', 'rstd_allowed_turnovers_w5_home',
       'ewm_allowed_turnovers_hl5_home', 'ra_points_1h_w1_home',
       'ra_points_1h_w3_home', 'ra_points_1h_w5_home',
       'ra_points_1h_w10_home', 'ra_allowed_points_1h_w1_home',
       'ra_allowed_points_1h_w3_home', 'ra_allowed_points_1h_w5_home',
       'ra_allowed_points_1h_w10_home', 'ra_points_2h_w1_home',
       'ra_points_2h_w3_home', 'ra_points_2h_w5_home',
       'ra_points_2h_w10_home', 'ra_allowed_points_2h_w1_home',
       'ra_allowed_points_2h_w3_home', 'ra_allowed_points_2h_w5_home',
       'ra_allowed_points_2h_w10_home', 'ra_margin_homeonly_w1_home',
       'ra_margin_homeonly_w3_home', 'ra_margin_homeonly_w5_home',
       'ra_margin_homeonly_w10_home', 'ra_points_for_w1_home',
       'ra_points_against_w1_home', 'ra_point_diff_w1_home',
       'ra_points_for_w3_home', 'ra_points_against_w3_home',
       'ra_point_diff_w3_home', 'ra_points_for_w5_home',
       'ra_points_against_w5_home', 'ra_point_diff_w5_home',
       'ra_points_for_w10_home', 'ra_points_against_w10_home',
       'ra_point_diff_w10_home', 'ra_margin_w1_home', 'ra_margin_w3_home',
       'ra_margin_w5_home', 'ra_margin_w10_home', 'rest_days_away',
       'ra_rest_days_w1_away', 'ra_rest_days_w3_away',
       'ra_rest_days_w5_away', 'ra_rest_days_w10_away',
       'ra_assists_w1_away', 'ra_allowed_assists_w1_away',
       'ra_assists_w3_away', 'ra_allowed_assists_w3_away',
       'ra_assists_w5_away', 'ra_allowed_assists_w5_away',
       'ra_assists_w10_away', 'ra_allowed_assists_w10_away',
       'rstd_assists_w5_away', 'ewm_assists_hl5_away',
       'rstd_allowed_assists_w5_away', 'ewm_allowed_assists_hl5_away',
       'ra_blocks_w1_away', 'ra_allowed_blocks_w1_away',
       'ra_blocks_w3_away', 'ra_allowed_blocks_w3_away',
       'ra_blocks_w5_away', 'ra_allowed_blocks_w5_away',
       'ra_blocks_w10_away', 'ra_allowed_blocks_w10_away',
       'rstd_blocks_w5_away', 'ewm_blocks_hl5_away',
       'rstd_allowed_blocks_w5_away', 'ewm_allowed_blocks_hl5_away',
       'ra_defensiveRebounds_w1_away',
       'ra_allowed_defensiveRebounds_w1_away',
       'ra_defensiveRebounds_w3_away',
       'ra_allowed_defensiveRebounds_w3_away',
       'ra_defensiveRebounds_w5_away',
       'ra_allowed_defensiveRebounds_w5_away',
       'ra_defensiveRebounds_w10_away',
       'ra_allowed_defensiveRebounds_w10_away',
       'rstd_defensiveRebounds_w5_away', 'ewm_defensiveRebounds_hl5_away',
       'rstd_allowed_defensiveRebounds_w5_away',
       'ewm_allowed_defensiveRebounds_hl5_away',
       'ra_fastBreakPoints_w1_away', 'ra_allowed_fastBreakPoints_w1_away',
       'ra_fastBreakPoints_w3_away', 'ra_allowed_fastBreakPoints_w3_away',
       'ra_fastBreakPoints_w5_away', 'ra_allowed_fastBreakPoints_w5_away',
       'ra_fastBreakPoints_w10_away',
       'ra_allowed_fastBreakPoints_w10_away',
       'rstd_fastBreakPoints_w5_away', 'ewm_fastBreakPoints_hl5_away',
       'rstd_allowed_fastBreakPoints_w5_away',
       'ewm_allowed_fastBreakPoints_hl5_away', 'ra_flagrantFouls_w1_away',
       'ra_allowed_flagrantFouls_w1_away', 'ra_flagrantFouls_w3_away',
       'ra_allowed_flagrantFouls_w3_away', 'ra_flagrantFouls_w5_away',
       'ra_allowed_flagrantFouls_w5_away', 'ra_flagrantFouls_w10_away',
       'ra_allowed_flagrantFouls_w10_away', 'rstd_flagrantFouls_w5_away',
       'ewm_flagrantFouls_hl5_away', 'rstd_allowed_flagrantFouls_w5_away',
       'ewm_allowed_flagrantFouls_hl5_away', 'ra_fouls_w1_away',
       'ra_allowed_fouls_w1_away', 'ra_fouls_w3_away',
       'ra_allowed_fouls_w3_away', 'ra_fouls_w5_away',
       'ra_allowed_fouls_w5_away', 'ra_fouls_w10_away',
       'ra_allowed_fouls_w10_away', 'rstd_fouls_w5_away',
       'ewm_fouls_hl5_away', 'rstd_allowed_fouls_w5_away',
       'ewm_allowed_fouls_hl5_away', 'ra_offensiveRebounds_w1_away',
       'ra_allowed_offensiveRebounds_w1_away',
       'ra_offensiveRebounds_w3_away',
       'ra_allowed_offensiveRebounds_w3_away',
       'ra_offensiveRebounds_w5_away',
       'ra_allowed_offensiveRebounds_w5_away',
       'ra_offensiveRebounds_w10_away',
       'ra_allowed_offensiveRebounds_w10_away',
       'rstd_offensiveRebounds_w5_away', 'ewm_offensiveRebounds_hl5_away',
       'rstd_allowed_offensiveRebounds_w5_away',
       'ewm_allowed_offensiveRebounds_hl5_away',
       'ra_pointsInPaint_w1_away', 'ra_allowed_pointsInPaint_w1_away',
       'ra_pointsInPaint_w3_away', 'ra_allowed_pointsInPaint_w3_away',
       'ra_pointsInPaint_w5_away', 'ra_allowed_pointsInPaint_w5_away',
       'ra_pointsInPaint_w10_away', 'ra_allowed_pointsInPaint_w10_away',
       'rstd_pointsInPaint_w5_away', 'ewm_pointsInPaint_hl5_away',
       'rstd_allowed_pointsInPaint_w5_away',
       'ewm_allowed_pointsInPaint_hl5_away', 'ra_steals_w1_away',
       'ra_allowed_steals_w1_away', 'ra_steals_w3_away',
       'ra_allowed_steals_w3_away', 'ra_steals_w5_away',
       'ra_allowed_steals_w5_away', 'ra_steals_w10_away',
       'ra_allowed_steals_w10_away', 'rstd_steals_w5_away',
       'ewm_steals_hl5_away', 'rstd_allowed_steals_w5_away',
       'ewm_allowed_steals_hl5_away', 'ra_technicalFouls_w1_away',
       'ra_allowed_technicalFouls_w1_away', 'ra_technicalFouls_w3_away',
       'ra_allowed_technicalFouls_w3_away', 'ra_technicalFouls_w5_away',
       'ra_allowed_technicalFouls_w5_away', 'ra_technicalFouls_w10_away',
       'ra_allowed_technicalFouls_w10_away',
       'rstd_technicalFouls_w5_away', 'ewm_technicalFouls_hl5_away',
       'rstd_allowed_technicalFouls_w5_away',
       'ewm_allowed_technicalFouls_hl5_away', 'ra_totalRebounds_w1_away',
       'ra_allowed_totalRebounds_w1_away', 'ra_totalRebounds_w3_away',
       'ra_allowed_totalRebounds_w3_away', 'ra_totalRebounds_w5_away',
       'ra_allowed_totalRebounds_w5_away', 'ra_totalRebounds_w10_away',
       'ra_allowed_totalRebounds_w10_away', 'rstd_totalRebounds_w5_away',
       'ewm_totalRebounds_hl5_away', 'rstd_allowed_totalRebounds_w5_away',
       'ewm_allowed_totalRebounds_hl5_away', 'ra_turnoverPoints_w1_away',
       'ra_allowed_turnoverPoints_w1_away', 'ra_turnoverPoints_w3_away',
       'ra_allowed_turnoverPoints_w3_away', 'ra_turnoverPoints_w5_away',
       'ra_allowed_turnoverPoints_w5_away', 'ra_turnoverPoints_w10_away',
       'ra_allowed_turnoverPoints_w10_away',
       'rstd_turnoverPoints_w5_away', 'ewm_turnoverPoints_hl5_away',
       'rstd_allowed_turnoverPoints_w5_away',
       'ewm_allowed_turnoverPoints_hl5_away', 'ra_turnovers_w1_away',
       'ra_allowed_turnovers_w1_away', 'ra_turnovers_w3_away',
       'ra_allowed_turnovers_w3_away', 'ra_turnovers_w5_away',
       'ra_allowed_turnovers_w5_away', 'ra_turnovers_w10_away',
       'ra_allowed_turnovers_w10_away', 'rstd_turnovers_w5_away',
       'ewm_turnovers_hl5_away', 'rstd_allowed_turnovers_w5_away',
       'ewm_allowed_turnovers_hl5_away', 'ra_points_1h_w1_away',
       'ra_points_1h_w3_away', 'ra_points_1h_w5_away',
       'ra_points_1h_w10_away', 'ra_allowed_points_1h_w1_away',
       'ra_allowed_points_1h_w3_away', 'ra_allowed_points_1h_w5_away',
       'ra_allowed_points_1h_w10_away', 'ra_points_2h_w1_away',
       'ra_points_2h_w3_away', 'ra_points_2h_w5_away',
       'ra_points_2h_w10_away', 'ra_allowed_points_2h_w1_away',
       'ra_allowed_points_2h_w3_away', 'ra_allowed_points_2h_w5_away',
       'ra_allowed_points_2h_w10_away', 'ra_margin_homeonly_w1_away',
       'ra_margin_homeonly_w3_away', 'ra_margin_homeonly_w5_away',
       'ra_margin_homeonly_w10_away', 'ra_points_for_w1_away',
       'ra_points_against_w1_away', 'ra_point_diff_w1_away',
       'ra_points_for_w3_away', 'ra_points_against_w3_away',
       'ra_point_diff_w3_away', 'ra_points_for_w5_away',
       'ra_points_against_w5_away', 'ra_point_diff_w5_away',
       'ra_points_for_w10_away', 'ra_points_against_w10_away',
       'ra_point_diff_w10_away', 'ra_margin_w1_away', 'ra_margin_w3_away',
       'ra_margin_w5_away', 'ra_margin_w10_away']]

Unnamed: 0,utc_seconds_since_midnight,utc_hour_sin,utc_hour_cos,et_hour,et_minute,et_second,et_seconds_since_midnight,et_hour_sin,et_hour_cos,et_is_weekend,...,ra_points_for_w5_away,ra_points_against_w5_away,ra_point_diff_w5_away,ra_points_for_w10_away,ra_points_against_w10_away,ra_point_diff_w10_away,ra_margin_w1_away,ra_margin_w3_away,ra_margin_w5_away,ra_margin_w10_away
0,1800,0.0,1.0,19,30,0,4664,-0.965926,0.258819,0,...,,,,,,,,,,
1,7200,0.5,0.866025,21,0,0,10064,-0.707107,0.7071068,0,...,,,,,,,,,,
3,12600,0.707107,0.707107,22,30,0,15464,-0.5,0.8660254,0,...,,,,,,,,,,
4,0,0.0,1.0,19,0,0,2864,-0.965926,0.258819,0,...,,,,,,,,,,
8,5400,0.258819,0.965926,20,30,0,8264,-0.866025,0.5,0,...,,,,,,,,,,
11,6300,0.258819,0.965926,20,45,0,9164,-0.866025,0.5,0,...,,,,,,,,,,
12,0,0.0,1.0,19,0,0,2864,-0.965926,0.258819,0,...,,,,,,,,,,
14,3600,0.258819,0.965926,20,0,0,6464,-0.866025,0.5,0,...,,,,,,,,,,
16,10800,0.707107,0.707107,22,0,0,13664,-0.5,0.8660254,0,...,,,,,,,,,,
17,3600,0.258819,0.965926,20,0,0,6464,-0.866025,0.5,0,...,,,,,,,,,,
