# ELO and Logistic Regression Model
This notebook loads NBA data from the `data` folder, computes ELO scores, and fits a logistic regression model with LASSO regularization.

In [ ]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression

DATA_FOLDER = os.path.join('coach_risk sensitivity', 'data')
csv_files = [f for f in os.listdir(DATA_FOLDER) if f.endswith('.csv')]
frames = {f: pd.read_csv(os.path.join(DATA_FOLDER, f)) for f in csv_files}
frames

In [ ]:
# Combine or select dataset
if frames:
    df = list(frames.values())[0]
else:
    df = pd.DataFrame()
df.head()

In [ ]:
def compute_elo(df, k=20, home_field_advantage=0):
    teams = pd.concat([df['HomeTeam'], df['AwayTeam']]).unique()
    ratings = {team: 1500 for team in teams}
    history = []
    for _, row in df.iterrows():
        home, away = row['HomeTeam'], row['AwayTeam']
        home_rating = ratings.get(home, 1500)
        away_rating = ratings.get(away, 1500)
        exp_home = 1 / (1 + 10 ** ((away_rating - home_rating + home_field_advantage) / 400))
        exp_away = 1 - exp_home
        result = row['HomeWin']
        ratings[home] += k * (result - exp_home)
        ratings[away] += k * ((1 - result) - exp_away)
        history.append({
            'Date': row.get('Date'),
            'HomeTeam': home,
            'AwayTeam': away,
            'HomeElo': ratings[home],
            'AwayElo': ratings[away]
        })
    return ratings, pd.DataFrame(history)

elo_ratings, elo_history = compute_elo(df)
elo_history.head()

In [ ]:
OUTPUT_ELO_PATH = os.path.join('coach_risk sensitivity', 'data', 'elo_history.csv')
elo_history.to_csv(OUTPUT_ELO_PATH, index=False)

In [ ]:
if not df.empty:
    df = df.assign(EloHome=elo_history['HomeElo'], EloAway=elo_history['AwayElo'])
    df['EloProb'] = 1 / (1 + 10 ** ((df['EloAway'] - df['EloHome'])/400))
    X = df[['PointDiff', 'SecondsRemaining', 'EloProb']].copy()
    X['PointDiff_EloProb'] = X['PointDiff'] * X['EloProb']
    X['SecondsRemaining_EloProb'] = X['SecondsRemaining'] * X['EloProb']
    X['PointDiff_SecondsRemaining'] = X['PointDiff'] * X['SecondsRemaining']
    X['PointDiff_SecondsRemaining_EloProb'] = X['PointDiff'] * X['SecondsRemaining'] * X['EloProb']
    y = df['HomeWin']
else:
    X = pd.DataFrame()
    y = pd.Series(dtype=float)
model = LogisticRegression(penalty='l1', solver='liblinear')
if not X.empty:
    model.fit(X, y)
model