In [88]:
import pandas as pd
import numpy as np
from pathlib import Path
import selenium
from datetime import date
import sys
sys.path.append('..')
from src.etl import *
from src.data.update_data import *

import optuna
%load_ext autoreload
%autoreload 2
import joblib

from sklearn.multioutput import MultiOutputRegressor
import lightgbm as lgb



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [81]:
Path.home().joinpath('NBA_Model_v1')

WindowsPath('C:/Users/Jordan Nishimura/NBA_Model_v1')

1) Update Data
2) Preprocess and reload into SQL DB
3) Pull Updated Preprocessed Data from SQL DB
4) Train Model on fully updated data
5) Pull Days Matchups
6) Predict on Days Matchups

## Update Data

In [82]:
db_path = Path.home() / 'NBA_Model_v1' / 'data' / 'nba.db'
season = 2021
update_all_data(db_path=db_path, season=season)



TypeError: update_all_data() got an unexpected keyword argument 'db_path'

## Preprocess and Reload into SQL DB

In [73]:
%run ..\\src\\etl.py

Loading raw team boxscore data from sql database...
Loading betting data from sql database...
Cleaning Data...
Merging Boxscore and Betting Data...
Aggregating over last 5, 10, and 20 game windows
adding rest days
creating matchups between Home and Away team aggregated stats
Resorting by date
dropping nulls
loading table back into sql db as team_stats_ewa_matchup


In [74]:
def season_to_string(x):
    return str(x) + '-' + str(x+1)[-2:]

def get_training_data_all(target, con):   

    df = pd.read_sql('SELECT * FROM team_stats_ewa_matchup', con=con)
    df = df.drop(columns=['index'])

    df = df.sort_values('GAME_DATE')

    df = df.dropna()

    columns_to_drop = ['SEASON', 'HOME_TEAM_ABBREVIATION', 'GAME_DATE', 'GAME_ID', 'MATCHUP',
                        'HOME_HOME_GAME', 'HOME_TEAM_SCORE', 'HOME_ML', 'HOME_SPREAD',
                        'HOME_ATS_DIFF', 'HOME_TEAM_COVERED', 'HOME_POINT_DIFF',
                        'HOME_WL', 'AWAY_ML', 'AWAY_TEAM_SCORE',
                        'HOME_PTS_L5', 'HOME_PTS_L10', 'HOME_PTS_L20',
                        'HOME_PLUS_MINUS_L5', 'HOME_PLUS_MINUS_L10', 'HOME_PLUS_MINUS_L20',
                        'HOME_NET_RATING_L5', 'HOME_NET_RATING_L10', 'HOME_NET_RATING_L20',
                        'HOME_POSS_L5', 'HOME_POSS_L10', 'HOME_POSS_L20',
                        'HOME_PTS_opp_L5', 'HOME_PTS_opp_L10', 'HOME_PTS_opp_L20',
                        'HOME_PLUS_MINUS_opp_L5', 'HOME_PLUS_MINUS_opp_L10', 'HOME_PLUS_MINUS_opp_L20',
                        'HOME_NET_RATING_opp_L5', 'HOME_NET_RATING_opp_L10', 'HOME_NET_RATING_opp_L20',
                        'HOME_POSS_opp_L5', 'HOME_POSS_opp_L10', 'HOME_POSS_opp_L20',
                        'HOME_REB_L5', 'HOME_REB_L10', 'HOME_REB_L20',  
                        'HOME_REB_opp_L5', 'HOME_REB_opp_L10', 'HOME_REB_opp_L20',       
                        'AWAY_PTS_L5', 'AWAY_PTS_L10', 'AWAY_PTS_L20',
                        'AWAY_PLUS_MINUS_L5', 'AWAY_PLUS_MINUS_L10', 'AWAY_PLUS_MINUS_L20',
                        'AWAY_NET_RATING_L5', 'AWAY_NET_RATING_L10', 'AWAY_NET_RATING_L20',
                        'AWAY_POSS_L5', 'AWAY_POSS_L10', 'AWAY_POSS_L20',
                        'AWAY_PTS_opp_L5', 'AWAY_PTS_opp_L10', 'AWAY_PTS_opp_L20',
                        'AWAY_PLUS_MINUS_opp_L5', 'AWAY_PLUS_MINUS_opp_L10', 'AWAY_PLUS_MINUS_opp_L20',
                        'AWAY_NET_RATING_opp_L5', 'AWAY_NET_RATING_opp_L10', 'AWAY_NET_RATING_opp_L20',
                        'AWAY_POSS_opp_L5', 'AWAY_POSS_opp_L10', 'AWAY_POSS_opp_L20',
                        'AWAY_REB_L5', 'AWAY_REB_L10', 'AWAY_REB_L20',
                        'AWAY_REB_opp_L5', 'AWAY_REB_opp_L10', 'AWAY_REB_opp_L20']

    X_train = df.drop(columns=columns_to_drop)
    y_train = df[target]

    return X_train, y_train


In [79]:
X_train.columns

Index(['HOME_FG2M_L5', 'HOME_FG2A_L5', 'HOME_FG3M_L5', 'HOME_FG3A_L5',
       'HOME_FTM_L5', 'HOME_FTA_L5', 'HOME_OREB_L5', 'HOME_DREB_L5',
       'HOME_AST_L5', 'HOME_STL_L5',
       ...
       'AWAY_TS_PCT_L20', 'AWAY_TS_PCT_opp_L20', 'AWAY_EFG_PCT_L20',
       'AWAY_EFG_PCT_opp_L20', 'AWAY_AST_RATIO_L20', 'AWAY_AST_RATIO_opp_L20',
       'AWAY_TOV_PCT_L20', 'AWAY_TOV_PCT_opp_L20', 'AWAY_PIE_L20',
       'AWAY_REST'],
      dtype='object', length=566)

In [83]:
target = ['HOME_TEAM_SCORE', 'AWAY_TEAM_SCORE']
db_filepath = Path.home().joinpath('NBA_model_v1', 'data', 'nba.db')
connection = sqlite3.connect(db_filepath)

X_train, y_train = get_training_data_all(target = target, con=connection)

In [84]:
X_train.shape

(10955, 566)

In [77]:
#load study with best hyperparameters
study_name = str(Path.home().joinpath('NBA_model_v1', 'models', 'hyperparameter_tuning', 'LGBMRegressor'))    
storage_name = "sqlite:///{}.db".format(study_name)

study = optuna.load_study(study_name = study_name, storage = storage_name)

params = study.best_params
print(params)

# instantiate model with hyperparameters
lgbr_model = MultiOutputRegressor(lgb.LGBMRegressor(**params))

## train model on full data

lgbr_model.fit(X_train, y_train)




{'boosting_type': 'gbdt', 'colsample_bytree': 0.4960110708575806, 'learning_rate': 0.10511063149508226, 'max_depth': 58, 'min_child_weight': 0.947131341685127, 'num_leaves': 11, 'reg_alpha': 0.36636398433425144, 'reg_lambda': 9.061364427698557, 'subsample': 0.8107222779801828}


In [78]:
#load study with best hyperparameters
from sklearn import pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler




study_name = str(Path.home().joinpath('NBA_model_v1', 'models', 'hyperparameter_tuning', 'SGDRegressor_ScorePredictor'))    
storage_name = "sqlite:///{}.db".format(study_name)

study = optuna.load_study(study_name = study_name, storage = storage_name)

params = study.best_params
print(params)

# instantiate model with hyperparameters
sgd_model = MultiOutputRegressor(Pipeline([('scaler', StandardScaler()),
                                           ('sgd', SGDRegressor(**params, max_iter=10000))
                                           ]
                                          )
                                 )

## train model on full data

sgd_model.fit(X_train, y_train)



{'alpha': 0.035149206342098345, 'epsilon': 5.05778119284027, 'l1_ratio': 0.949791773845651, 'loss': 'huber'}


NameError: name 'Pipeline' is not defined

In [None]:
HOME_TEAM = 'SAS'
AWAY_TEAM = 'LAL'


In [85]:
import numpy as np
import pandas as pd
import sqlite3
from pathlib import Path
import warnings


def load_team_data(conn, start_season, end_season):
    """Loads basic, advanced, and scoring boxscores 
    from sqlite db and merges them into one dataframe"""
    

    basic = pd.read_sql("SELECT * FROM team_basic_boxscores", conn)
    adv = pd.read_sql("SELECT * FROM team_advanced_boxscores", conn)
    scoring = pd.read_sql("SELECT * FROM team_scoring_boxscores", conn)
    tracking = pd.read_sql("SELECT * FROM team_tracking_boxscores", conn)

    basic = basic.loc[basic['SEASON'].between(start_season, end_season)]
    basic[['GAME_ID', 'TEAM_ID']] = basic[['GAME_ID', 'TEAM_ID']].astype(str)
    adv[['GAME_ID', 'TEAM_ID']] = adv[['GAME_ID', 'TEAM_ID']].astype(str)
    scoring[['GAME_ID', 'TEAM_ID']] = scoring[['GAME_ID', 'TEAM_ID']].astype(str)
    tracking[['GAME_ID', 'TEAM_ID']] = tracking[['GAME_ID', 'TEAM_ID']].astype(str)

    df = pd.merge(basic, adv, how='left', on=[
                    'GAME_ID', 'TEAM_ID'], suffixes=['', '_y'])
    df = pd.merge(df, scoring, how='left', on=[
                  'GAME_ID', 'TEAM_ID'], suffixes=['', '_y'])
    
    df = pd.merge(df, tracking, how='left', on=['GAME_ID', 'TEAM_ID'],
                  suffixes=['', '_y'])
    

    df = df.drop(columns=['TEAM_NAME_y', 'TEAM_CITY',
                          'TEAM_ABBREVIATION_y',
                          'TEAM_CITY_y', 'MIN_y',
                          'FG_PCT_y', 'AST_y'])
    
    return df


def clean_team_data(df):
    """This function cleans the team_data
    1) Changes W/L to 1/0 
    2) Changes franchise abbreviations to their most 
    recent abbreviation for consistency
    3) Converts GAME_DATE to datetime object
    4) Creates a binary column 'HOME_GAME'
    5) Removes 3 games where advanced stats were not collected
    """
    df = df.copy()
    df['WL'] = (df['WL'] == 'W').astype(int)

    abbr_mapping = {'NJN': 'BKN',
                    'CHH': 'CHA',
                    'VAN': 'MEM',
                    'NOH': 'NOP',
                    'NOK': 'NOP',
                    'SEA': 'OKC'}

    df['TEAM_ABBREVIATION'] = df['TEAM_ABBREVIATION'].replace(abbr_mapping)
    df['MATCHUP'] = df['MATCHUP'].str.replace('NJN', 'BKN')
    df['MATCHUP'] = df['MATCHUP'].str.replace('CHH', 'CHA')
    df['MATCHUP'] = df['MATCHUP'].str.replace('VAN', 'MEM')
    df['MATCHUP'] = df['MATCHUP'].str.replace('NOH', 'NOP')
    df['MATCHUP'] = df['MATCHUP'].str.replace('NOK', 'NOP')
    df['MATCHUP'] = df['MATCHUP'].str.replace('SEA', 'OKC')

    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])

    df['HOME_GAME'] = df['MATCHUP'].str.contains('vs').astype(int)

    return df


# from src.data.make_team_dataset import prep_for_aggregation

def prep_for_aggregation(df):
    """This function...
    1) Removes categories that are percentages,
    as we will be averaging them and do not want to average 
    percentages. 
    2) Converts shooting percentage stats into raw values"""
    df = df.copy()

    df = df.drop(columns=['FT_PCT', 'FG_PCT', 'FG3_PCT', 'DREB_PCT',
                          'OREB_PCT', 'REB_PCT', 'AST_PCT', 'AST_TOV',
                          'AST_RATIO', 'E_TM_TOV_PCT', 'TM_TOV_PCT',
                          'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT',
                          'PACE_PER40', 'MIN', 'PIE', 'CFG_PCT', 'UFG_PCT',
                          'DFG_PCT', 'E_OFF_RATING', 'E_DEF_RATING', 'E_NET_RATING'])

    df['FG2M'] = df['FGM'] - df['FG3M']
    df['FG2A'] = df['FGA'] - df['FG3A']
    df['PTS_2PT_MR'] = (df['PTS'] * df['PCT_PTS_2PT_MR']).astype('int8')
    df['PTS_FB'] = (df['PTS'] * df['PCT_PTS_FB']).astype('int8')
    df['PTS_OFF_TOV'] = (df['PTS'] * df['PCT_PTS_OFF_TOV']).astype('int8')
    df['PTS_PAINT'] = (df['PTS'] * df['PCT_PTS_PAINT']).astype('int8')
    df['AST_2PM'] = (df['FG2M'] * df['PCT_AST_2PM']).astype('int8')
    df['AST_3PM'] = (df['FG3M'] * df['PCT_AST_3PM']).astype('int8')
    df['UAST_2PM'] = (df['FG2M'] * df['PCT_UAST_2PM']).astype('int8')
    df['UAST_3PM'] = (df['FG3M'] * df['PCT_UAST_3PM']).astype('int8')

    df['POINT_DIFF'] = df['PLUS_MINUS']
    df['RECORD'] = df['WL']
    df['TEAM_SCORE'] = df['PTS']
    
    df = df.drop(columns = ['PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_2PT',
                          'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB',
                          'PCT_PTS_FT','PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 
                          'PCT_AST_2PM', 'PCT_UAST_2PM','PCT_AST_3PM',
                          'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM',
                          'E_PACE'])
    
    ## Reorder Columns
    
    df = df[['SEASON', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'TEAM_SCORE', 'WL', 'POINT_DIFF', 'HOME_GAME', 'RECORD',
       'FG2M', 'FG2A', 'FG3M', 'FG3A', 'FTM', 'FTA', 'OREB', 'DREB', 'REB', 
       'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
       'PLUS_MINUS', 'OFF_RATING', 'DEF_RATING', 'NET_RATING', 'PACE',
       'POSS', 'DIST', 'ORBC', 'DRBC', 'RBC', 'TCHS', 'SAST', 'FTAST', 'PASS',
       'CFGM', 'CFGA', 'UFGM', 'UFGA', 'DFGM', 'DFGA', 'PTS_2PT_MR', 'PTS_FB', 'PTS_OFF_TOV', 'PTS_PAINT', 'AST_2PM',
       'AST_3PM', 'UAST_2PM', 'UAST_3PM']]

    return df


def load_betting_data(conn):
    spreads = pd.read_sql("SELECT * FROM spreads", conn)
    moneylines = pd.read_sql("SELECT * FROM moneylines", conn)

    return spreads, moneylines


def convert_american_to_decimal(x):
    return np.where(x>0, (100+x)/100, 1+(100.0/-x))          


def clean_moneyline_df(df):
    abbr_mapping = {'Boston': 'BOS', 'Portland': 'POR',
                    'L.A. Lakers': 'LAL', 'Brooklyn': 'BKN',
                    'Cleveland': 'CLE', 'Toronto': 'TOR',
                    'Philadelphia': 'PHI', 'Memphis': 'MEM',
                    'Minnesota': 'MIN', 'New Orleans': 'NOP',
                    'Oklahoma City': 'OKC', 'Dallas': 'DAL',
                    'San Antonio': 'SAS', 'Denver': 'DEN',
                    'Golden State': 'GSW', 'L.A. Clippers': 'LAC',
                    'Orlando': 'ORL', 'Utah': 'UTA',
                    'Charlotte': 'CHA', 'Detroit': 'DET',
                    'Miami': 'MIA', 'Phoenix': 'PHX',
                    'Atlanta': 'ATL', 'New York': 'NYK',
                    'Indiana': 'IND', 'Chicago': 'CHI',
                    'Houston': 'HOU', 'Milwaukee': 'MIL',
                    'Sacramento': 'SAC', 'Washington': 'WAS'}

    df['HOME_TEAM'] = df['HOME_TEAM'].replace(abbr_mapping)
    df['AWAY_TEAM'] = df['AWAY_TEAM'].replace(abbr_mapping)

    away_mls = df['AWAY_ML'].str.split(",", expand=True)
    home_mls = df['HOME_ML'].str.split(",", expand=True)

    away_mls = away_mls.replace('-', np.nan).replace('', np.nan)
    away_mls = away_mls.fillna(value=np.nan)
    away_mls = away_mls.astype(float)

    home_mls = home_mls.replace('-', np.nan).replace('', np.nan)
    home_mls = home_mls.fillna(value=np.nan)
    home_mls = home_mls.astype(float)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
    
        highest_away_ml = away_mls.apply(lambda row: np.nanmax(
            abs(row)) if np.nanmax(row) > 0 else -np.nanmax(abs(row)), axis=1)
        highest_away_ml = convert_american_to_decimal(highest_away_ml)
        highest_away_ml = pd.DataFrame(
            highest_away_ml, columns=['HIGHEST_AWAY_ML'])

        highest_home_ml = home_mls.apply(lambda row: np.nanmax(
            abs(row)) if np.nanmax(row) > 0 else -np.nanmax(abs(row)), axis=1)
        highest_home_ml = convert_american_to_decimal(highest_home_ml)
        highest_home_ml = pd.DataFrame(
            highest_home_ml, columns=['HIGHEST_HOME_ML'])

    moneylines = pd.concat(
        [df.iloc[:, :4], highest_home_ml, highest_away_ml], axis=1)
    
    moneylines['GM_DATE'] = pd.to_datetime(moneylines['GM_DATE'])

    return moneylines

def clean_spreads_df(df):
    abbr_mapping = {'Boston': 'BOS', 'Portland': 'POR',
                    'L.A. Lakers': 'LAL', 'Brooklyn': 'BKN',
                    'Cleveland': 'CLE', 'Toronto': 'TOR',
                    'Philadelphia': 'PHI', 'Memphis': 'MEM',
                    'Minnesota': 'MIN', 'New Orleans': 'NOP',
                    'Oklahoma City': 'OKC', 'Dallas': 'DAL',
                    'San Antonio': 'SAS', 'Denver': 'DEN',
                    'Golden State': 'GSW', 'L.A. Clippers': 'LAC',
                    'Orlando': 'ORL', 'Utah': 'UTA',
                    'Charlotte': 'CHA', 'Detroit': 'DET',
                    'Miami': 'MIA', 'Phoenix': 'PHX',
                    'Atlanta': 'ATL', 'New York': 'NYK',
                    'Indiana': 'IND', 'Chicago': 'CHI',
                    'Houston': 'HOU', 'Milwaukee': 'MIL',
                    'Sacramento': 'SAC', 'Washington': 'WAS'}

    df['HOME_TEAM'] = df['HOME_TEAM'].replace(abbr_mapping)
    df['AWAY_TEAM'] = df['AWAY_TEAM'].replace(abbr_mapping)

    away_spreads = df['AWAY_SPREAD'].str.split(",", expand=True)
    home_spreads = df['HOME_SPREAD'].str.split(",", expand=True)

    for col in away_spreads.columns:
        away_spreads[col] = away_spreads[col].str[:-4]
        away_spreads[col] = away_spreads[col].str.replace('½', '.5')
        away_spreads[col] = away_spreads[col].str.replace('PK', '0')

        away_spreads[col] = away_spreads[col].astype(str).apply(
            lambda x: x if x == '' else (x[:-1] if x[-1] == '-' else x))

    away_spreads = away_spreads.replace('-', np.nan)
    away_spreads = away_spreads.replace('', np.nan)
    away_spreads = away_spreads.replace('None', np.nan)
    away_spreads = away_spreads.fillna(value=np.nan)

    away_spreads = away_spreads.astype(float)

    for col in home_spreads.columns:
        home_spreads[col] = home_spreads[col].str[:-4]
        home_spreads[col] = home_spreads[col].str.replace('½', '.5')
        home_spreads[col] = home_spreads[col].str.replace('PK', '0')

        home_spreads[col] = home_spreads[col].astype(str).apply(
            lambda x: x if x == '' else (x[:-1] if x[-1] == '-' else x))

    home_spreads = home_spreads.replace('-', np.nan).replace('', np.nan).replace('None', np.nan)
    home_spreads = home_spreads.fillna(value=np.nan)

    home_spreads = home_spreads.astype(float)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)

        highest_away_spread = away_spreads.apply(
            lambda row: -np.nanmax(abs(row)) if np.nanmax(row) < 0 else np.nanmax(abs(row)), axis=1)
        highest_away_spread = pd.DataFrame(
            highest_away_spread, columns=['HIGHEST_AWAY_SPREAD'])

        highest_home_spread = home_spreads.apply(
            lambda row: -np.nanmax(abs(row)) if np.nanmax(row) < 0 else np.nanmax(abs(row)), axis=1)
        highest_home_spread = pd.DataFrame(
            highest_home_spread, columns=['HIGHEST_HOME_SPREAD'])

    spreads = pd.concat(
        [df.iloc[:, :4], highest_home_spread, highest_away_spread], axis=1)
    spreads['GM_DATE'] = pd.to_datetime(spreads['GM_DATE'])

    return spreads


def merge_betting_and_boxscore_data(clean_spreads, clean_mls, clean_boxscores):
    clean_boxscores['HOME_TEAM'] = clean_boxscores['MATCHUP'].apply(
        lambda x: x[:3] if 'vs' in x else x[-3:])
    clean_boxscores['AWAY_TEAM'] = clean_boxscores['MATCHUP'].apply(
        lambda x: x[:3] if '@' in x else x[-3:])

    temp = pd.merge(clean_mls, clean_spreads, on=[
                    'SEASON', 'GM_DATE', 'HOME_TEAM', 'AWAY_TEAM'])

    merged_df = pd.merge(clean_boxscores, temp, how='left', 
                         left_on=['SEASON', 'HOME_TEAM', 'AWAY_TEAM', 'GAME_DATE'],
                         right_on=['SEASON', 'HOME_TEAM', 'AWAY_TEAM', 'GM_DATE'])

    merged_df['ML'] = merged_df.apply(lambda row: row['HIGHEST_HOME_ML'] if row['HOME_GAME'] == 1
                                      else row['HIGHEST_AWAY_ML'], axis=1)

    merged_df['SPREAD'] = merged_df.apply(lambda row: row['HIGHEST_HOME_SPREAD'] if row['HOME_GAME'] == 1
                                          else -row['HIGHEST_HOME_SPREAD'], axis=1)

    merged_df = merged_df.drop(columns=['HOME_TEAM', 'AWAY_TEAM', 'GM_DATE',
                                        'HIGHEST_HOME_ML', 'HIGHEST_AWAY_ML',
                                        'HIGHEST_HOME_SPREAD', 'HIGHEST_AWAY_SPREAD'])

    merged_df['ATS_DIFF'] = merged_df['POINT_DIFF'] + merged_df['SPREAD']

    merged_df['TEAM_COVERED'] = (merged_df['ATS_DIFF'] > 0).astype(int)
    

    return merged_df


def normalize_per_100_poss(df):
    df = df.copy(deep=True)
    
    df.iloc[:, 12:27] = 100*df.iloc[:, 12:27].div(df['PACE'], axis=0) 
    df.iloc[:,  34:-4] = 100*df.iloc[:, 34:-4].div(df['PACE'], axis=0) 
    
    return df


def create_matchups(df):
    """This function makes each row a matchup between 
    team and opp"""
    df = df.copy()
    

    matchups = pd.merge(df, df.iloc[:, :-4], on=['GAME_ID'], suffixes=['', '_opp'])
    matchups = matchups.loc[matchups['TEAM_ABBREVIATION'] != matchups['TEAM_ABBREVIATION_opp']]

    matchups = matchups.drop(columns = ['SEASON_opp', 'TEAM_ABBREVIATION_opp', 'GAME_DATE_opp',
                                        'MATCHUP_opp', 'HOME_GAME_opp', 'TEAM_NAME_opp', 
                                        'TEAM_ID_opp', 'WL_opp']
                             )
    
    matchups
    
    return matchups


def build_team_avg_stats_df(df: pd.DataFrame, span = 10) -> pd.DataFrame:    
    """This function finds the average for each team and opp statistic up to (and NOT including) the given date.
    """
    
    df = df.copy(deep=True)

    df = df.sort_values(['TEAM_ABBREVIATION', 'GAME_DATE']).reset_index(drop=True)

    

    drop_cols = ['TEAM_ID', 'TEAM_NAME', 'GAME_ID', 'MATCHUP', 
                 'HOME_GAME', 'TEAM_SCORE', 'ML', 'SPREAD', 
                'GAME_DATE', 'POINT_DIFF', 'WL', 'TEAM_SCORE_opp',
                'POINT_DIFF_opp', 'RECORD', 'RECORD_opp', 'TEAM_COVERED']

    stats = df.drop(columns=drop_cols)

    avg_stat_holder = []

    for stat in stats.columns[2:]:
        avg_stats = stats.groupby(['TEAM_ABBREVIATION'])[stat].ewm(span=span).mean().reset_index(drop=True)
        avg_stat_holder.append(avg_stats)
    
    
    matchup_info = df[['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE',
                          'GAME_ID', 'MATCHUP', 'HOME_GAME', 'TEAM_SCORE',
                          'ML', 'SPREAD', 'ATS_DIFF', 'RECORD', 'TEAM_COVERED', 
                          'POINT_DIFF', 'WL']]   

    avg_stats = pd.concat(avg_stat_holder, axis=1)
    

    avg_stats = avg_stats.rename(columns={'ATS_DIFF':'AVG_ATS_DIFF'})
    
    avg_stats = pd.concat([matchup_info, avg_stats], axis=1)
    
    avg_stats['WIN_PCT'] = avg_stats.groupby(['TEAM_ABBREVIATION'])['RECORD'].rolling(window=span).mean().values
    avg_stats['COVER_PCT'] = avg_stats.groupby(['TEAM_ABBREVIATION'])['TEAM_COVERED'].rolling(window=span).mean().values

    avg_stats = avg_stats.drop(columns='RECORD')

    avg_stats = avg_stats.sort_values(['TEAM_ABBREVIATION', 'GAME_DATE'])
    # avg_stats.iloc[:, 14:] = avg_stats.iloc[:, 14:].shift(1).where(avg_stats['TEAM_ABBREVIATION'].eq(avg_stats['TEAM_ABBREVIATION'].shift()))

    avg_stats = avg_stats.add_suffix('_L{}'.format(span))
    
    avg_stats = avg_stats.rename(columns = {'SEASON_L{}'.format(span):'SEASON',
                                           'TEAM_ABBREVIATION_L{}'.format(span):'TEAM_ABBREVIATION',
                                           'GAME_DATE_L{}'.format(span):'GAME_DATE',
                                           'GAME_ID_L{}'.format(span):'GAME_ID',
                                           'MATCHUP_L{}'.format(span): 'MATCHUP', 
                                           'HOME_GAME_L{}'.format(span): 'HOME_GAME', 
                                           'TEAM_SCORE_L{}'.format(span):'TEAM_SCORE',
                                           'ML_L{}'.format(span):'ML', 
                                           'SPREAD_L{}'.format(span):'SPREAD',
                                           'ATS_DIFF_L{}'.format(span):'ATS_DIFF',
                                           'RECORD_L{}'.format(span):'RECORD', 
                                           'TEAM_COVERED_L{}'.format(span):'TEAM_COVERED',
                                           'POINT_DIFF_L{}'.format(span):'POINT_DIFF',
                                           'WL_L{}'.format(span):'WL'})
    
    return avg_stats


def add_percentage_features(df, span):
    """Add the following features for both team and opp:
    OREB_PCT, DREB_PCT, REB_PCT, TS_PCT, EFG_PCT, AST_RATIO, TOV_PCT, PIE.
    """
    
    df = df.copy()
    
    df['OREB_PCT_L{}'.format(span)] = df['OREB_L{}'.format(span)] / (df['OREB_L{}'.format(span)] + df['DREB_opp_L{}'.format(span)])
    df['OREB_PCT_opp_L{}'.format(span)] = df['OREB_opp_L{}'.format(span)] / (df['OREB_opp_L{}'.format(span)] + df['DREB_L{}'.format(span)])

    df['DREB_PCT_L{}'.format(span)] = df['DREB_L{}'.format(span)] / (df['DREB_L{}'.format(span)] + df['OREB_opp_L{}'.format(span)])
    df['DREB_PCT_opp_L{}'.format(span)] = df['DREB_opp_L{}'.format(span)] / (df['DREB_opp_L{}'.format(span)] + df['OREB_L{}'.format(span)])

    df['REB_PCT_L{}'.format(span)] = df['REB_L{}'.format(span)] / (df['REB_L{}'.format(span)] + df['REB_opp_L{}'.format(span)])
    df['REB_PCT_opp_L{}'.format(span)] = df['REB_opp_L{}'.format(span)] / (df['REB_opp_L{}'.format(span)] + df['REB_L{}'.format(span)])

    df['TS_PCT_L{}'.format(span)] = df['PTS_L{}'.format(span)] / ((2*(df['FG2A_L{}'.format(span)] + df['FG3A_L{}'.format(span)]) + 0.44*df['FTA_L{}'.format(span)]))
    
    df['TS_PCT_opp_L{}'.format(span)] = df['PTS_opp_L{}'.format(span)] / ((2*(df['FG2A_opp_L{}'.format(span)] + df['FG3A_opp_L{}'.format(span)]) + 0.44*df['FTA_opp_L{}'.format(span)]))

    df['EFG_PCT_L{}'.format(span)] = (df['FG2M_L{}'.format(span)] + 1.5*df['FG3M_L{}'.format(span)]) / (df['FG2A_L{}'.format(span)]
                                                                    + df['FG3A_L{}'.format(span)])
    df['EFG_PCT_opp_L{}'.format(span)] = (df['FG2M_opp_L{}'.format(span)] + 1.5*df['FG3M_opp_L{}'.format(span)]) / (df['FG2A_opp_L{}'.format(span)] 
                                                                 + df['FG3A_opp_L{}'.format(span)])

    df['AST_RATIO_L{}'.format(span)] = (df['AST_L{}'.format(span)] * 100) / df['PACE_L{}'.format(span)]
    df['AST_RATIO_opp_L{}'.format(span)] = (df['AST_opp_L{}'.format(span)] * 100) / df['PACE_opp_L{}'.format(span)]

    df['TOV_PCT_L{}'.format(span)] = 100*df['TOV_L{}'.format(span)] / (df['FG2A_L{}'.format(span)] 
                                               + df['FG3A_L{}'.format(span)] 
                                               + 0.44*df['FTA_L{}'.format(span)] 
                                               + df['TOV_L{}'.format(span)])
    
    df['TOV_PCT_opp_L{}'.format(span)] = 100*df['TOV_opp_L{}'.format(span)] / (df['FG2A_opp_L{}'.format(span)] 
                                             + df['FG3A_opp_L{}'.format(span)] 
                                             + 0.44*df['FTA_opp_L{}'.format(span)] 
                                             + df['TOV_opp_L{}'.format(span)])
    
    
    df['PIE_L{}'.format(span)] = ((df['PTS_L{}'.format(span)] + df['FG2M_L{}'.format(span)] + df['FG3M_L{}'.format(span)] + df['FTM_L{}'.format(span)] 
                 - df['FG2A_L{}'.format(span)] - df['FG3A_L{}'.format(span)] - df['FTA_L{}'.format(span)] 
                 + df['DREB_L{}'.format(span)] + df['OREB_L{}'.format(span)]/2
                + df['AST_L{}'.format(span)] + df['STL_L{}'.format(span)] + df['BLK_L{}'.format(span)]/2
                - df['PF_L{}'.format(span)] - df['TOV_L{}'.format(span)]) 
                 / (df['PTS_L{}'.format(span)] + df['PTS_opp_L{}'.format(span)] + df['FG2M_L{}'.format(span)] + df['FG2M_opp_L{}'.format(span)]
                   + df['FG3M_L{}'.format(span)] + df['FG3M_opp_L{}'.format(span)] + df['FTM_L{}'.format(span)] + df['FTM_opp_L{}'.format(span)]
                   - df['FG2A_L{}'.format(span)] - df['FG2A_opp_L{}'.format(span)] - df['FG3A_L{}'.format(span)] - df['FG3A_opp_L{}'.format(span)] 
                    - df['FTA_L{}'.format(span)] - df['FTA_opp_L{}'.format(span)] + df['DREB_L{}'.format(span)] + df['DREB_opp_L{}'.format(span)]
                    + (df['OREB_L{}'.format(span)]+df['OREB_opp_L{}'.format(span)])/2 + df['AST_L{}'.format(span)] + df['AST_opp_L{}'.format(span)]
                    + df['STL_L{}'.format(span)] + df['STL_opp_L{}'.format(span)] + (df['BLK_L{}'.format(span)] + df['BLK_opp_L{}'.format(span)])/2
                    - df['PF_L{}'.format(span)] - df['PF_opp_L{}'.format(span)] - df['TOV_L{}'.format(span)] - df['TOV_opp_L{}'.format(span)]))
        
    return df


def add_rest_days_for_model(df):
    df['REST'] = np.nan
    for team in df['TEAM_ABBREVIATION'].unique():
        team_df = df.loc[df['TEAM_ABBREVIATION'] == team].sort_values('GAME_DATE')
        idx = team_df.index
        team_df['REST'] = (team_df['GAME_DATE'].shift(-1) - team_df['GAME_DATE']) / np.timedelta64(1, 'D')
        team_df.at[max(idx), 'REST'] = (pd.to_datetime(date.today()) - team_df.at[max(idx), 'GAME_DATE']) / np.timedelta64(1, 'D')

        df.loc[idx, 'REST'] = team_df['REST']
        df.loc[df['REST'] >= 8, 'REST'] = 8
            
    return df


def season_to_string(x):
    return str(x) + '-' + str(x+1)[-2:]


def load_and_process_data(start_season, end_season):
    start_season = season_to_string(start_season)
    end_season = season_to_string(end_season)

    db_filepath = Path.home().joinpath('NBA_model_v1', 'data', 'nba.db')

    conn = sqlite3.connect(db_filepath)
    
    print("Loading raw team boxscore data from sql database...")
    
    df = load_team_data(conn, start_season, end_season)
    print("Loading betting data from sql database...")
    spreads, moneylines = load_betting_data(conn)
    
    print("Cleaning Data...")
    df = clean_team_data(df)
    df = prep_for_aggregation(df)

    clean_mls = clean_moneyline_df(df = moneylines)
    clean_spreads = clean_spreads_df(df = spreads)
    
    print("Merging Boxscore and Betting Data...")
    merged_df = merge_betting_and_boxscore_data(
        clean_spreads, clean_mls, clean_boxscores = df)
    
    
    stats_per_100 = normalize_per_100_poss(merged_df)

    print("Aggregating over last 5, 10, and 20 game windows")
    
    matchups = create_matchups(stats_per_100)
    
    team_stats_ewa_5 = build_team_avg_stats_df(matchups, span=5)
    team_stats_ewa_5 = add_percentage_features(team_stats_ewa_5, span=5)

    team_stats_ewa_10 = build_team_avg_stats_df(matchups, span=10)
    team_stats_ewa_10 = add_percentage_features(team_stats_ewa_10, span=10)

    team_stats_ewa_20 = build_team_avg_stats_df(matchups, span=20)
    team_stats_ewa_20 = add_percentage_features(team_stats_ewa_20, span=20)


    temp = pd.merge(team_stats_ewa_5, team_stats_ewa_10, how='inner',
                    on=['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE',
                        'GAME_ID', 'MATCHUP', 'HOME_GAME', 'TEAM_SCORE',
                        'ML', 'SPREAD', 'ATS_DIFF', 'TEAM_COVERED', 
                        'POINT_DIFF', 'WL'])

    df_full = pd.merge(temp, team_stats_ewa_20, how='inner', 
                       on=['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE',
                            'GAME_ID', 'MATCHUP', 'HOME_GAME', 'TEAM_SCORE',
                            'ML', 'SPREAD', 'ATS_DIFF', 'TEAM_COVERED', 
                            'POINT_DIFF', 'WL'])

    df_full = df_full.sort_values(['GAME_DATE', 'GAME_ID', 'HOME_GAME'])
    
    
    columns_to_drop = ['PTS_L5', 'PTS_L10', 'PTS_L20',
                        'PLUS_MINUS_L5', 'PLUS_MINUS_L10', 'PLUS_MINUS_L20',
                        'NET_RATING_L5', 'NET_RATING_L10', 'NET_RATING_L20',
                        'POSS_L5', 'POSS_L10', 'POSS_L20',
                        'REB_L5', 'REB_L10', 'REB_L20',
                        'REB_opp_L5', 'REB_opp_L10', 'REB_opp_L20',
                        'PTS_opp_L5', 'PTS_opp_L10', 'PTS_opp_L20',
                        'PLUS_MINUS_opp_L5', 'PLUS_MINUS_opp_L10', 'PLUS_MINUS_opp_L20',
                        'NET_RATING_opp_L5', 'NET_RATING_opp_L10', 'NET_RATING_opp_L20',
                        'POSS_opp_L5', 'POSS_opp_L10', 'POSS_opp_L20']
    
    df_full = df_full.drop(columns = columns_to_drop)
    
    print("adding rest days")
    df_full = add_rest_days_for_model(df_full)
    
    return df_full
    

def make_matchup_row(home_team, away_team, df):
    
    print("creating matchups between Home and Away team aggregated stats")

    matchup_info_cols = ['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE', 'GAME_ID', 'MATCHUP',
        'HOME_GAME', 'TEAM_SCORE', 'ML', 'SPREAD', 'ATS_DIFF', 'TEAM_COVERED',
        'POINT_DIFF', 'WL']

    most_recent_home_stats = df.loc[df['TEAM_ABBREVIATION'] == home_team].tail(1).drop(columns=matchup_info_cols).values
    most_recent_away_stats = df.loc[df['TEAM_ABBREVIATION'] == away_team].tail(1).drop(columns=matchup_info_cols).values

    matchup_row = pd.DataFrame(np.concatenate([most_recent_home_stats, most_recent_away_stats], axis=1), columns=X_train.columns)
        
    return matchup_row

In [86]:
df_full = load_and_process_data(2013, 2021)

Loading raw team boxscore data from sql database...
Loading betting data from sql database...
Cleaning Data...
Merging Boxscore and Betting Data...
Aggregating over last 5, 10, and 20 game windows
adding rest days


In [94]:
lgbr_filepath = Path.home().joinpath('NBA_model_v1', 'models', 'LGBRegressor.sav')
sgdr_filepath = Path.home().joinpath('NBA_model_v1', 'models', 'SGDRegressor_ScorePredictor.sav')
lgbc_filepath = Path.home().joinpath('NBA_model_v1', 'models', 'LGBMClassifier.sav')
sgdc_hinge_filepath = Path.home().joinpath('NBA_model_v1', 'models', 'SGDClassifierHinge_WinPredictor.sav')
sgdc_logloss_filepath = Path.home().joinpath('NBA_model_v1', 'models', 'SGDClassifierLogLoss_WinPredictor.sav')

LGBRegressor = joblib.load(lgbr_filepath)
SGDRegressor = joblib.load(sgdr_filepath)
LGBClassifier = joblib.load(lgbc_filepath)
SGDClassifier_Hinge = joblib.load(sgdc_hinge_filepath)
SGDClassifier_LogLoss = joblib.load(sgdc_logloss_filepath)

In [95]:
print(SGDClassifier_LogLoss)

Pipeline(steps=[('scaler', StandardScaler()),
                ('sgd',
                 SGDClassifier(alpha=0.009368071103349671,
                               l1_ratio=0.455607493192275, loss='log_loss',
                               shuffle=False))])


In [90]:
home_team = 'BOS'
away_team = 'PHI'

row = make_matchup_row(home_team, away_team, df = df_full)

creating matchups between Home and Away team aggregated stats


In [97]:
row

Unnamed: 0,HOME_FG2M_L5,HOME_FG2A_L5,HOME_FG3M_L5,HOME_FG3A_L5,HOME_FTM_L5,HOME_FTA_L5,HOME_OREB_L5,HOME_DREB_L5,HOME_AST_L5,HOME_STL_L5,HOME_BLK_L5,HOME_TOV_L5,HOME_PF_L5,HOME_OFF_RATING_L5,HOME_DEF_RATING_L5,HOME_PACE_L5,HOME_DIST_L5,HOME_ORBC_L5,HOME_DRBC_L5,HOME_RBC_L5,HOME_TCHS_L5,HOME_SAST_L5,HOME_FTAST_L5,HOME_PASS_L5,HOME_CFGM_L5,HOME_CFGA_L5,HOME_UFGM_L5,HOME_UFGA_L5,HOME_DFGM_L5,HOME_DFGA_L5,HOME_PTS_2PT_MR_L5,HOME_PTS_FB_L5,HOME_PTS_OFF_TOV_L5,HOME_PTS_PAINT_L5,HOME_AST_2PM_L5,HOME_AST_3PM_L5,HOME_UAST_2PM_L5,HOME_UAST_3PM_L5,HOME_AVG_ATS_DIFF_L5,HOME_FG2M_opp_L5,HOME_FG2A_opp_L5,HOME_FG3M_opp_L5,HOME_FG3A_opp_L5,HOME_FTM_opp_L5,HOME_FTA_opp_L5,HOME_OREB_opp_L5,HOME_DREB_opp_L5,HOME_AST_opp_L5,HOME_STL_opp_L5,HOME_BLK_opp_L5,HOME_TOV_opp_L5,HOME_PF_opp_L5,HOME_OFF_RATING_opp_L5,HOME_DEF_RATING_opp_L5,HOME_PACE_opp_L5,HOME_DIST_opp_L5,HOME_ORBC_opp_L5,HOME_DRBC_opp_L5,HOME_RBC_opp_L5,HOME_TCHS_opp_L5,HOME_SAST_opp_L5,HOME_FTAST_opp_L5,HOME_PASS_opp_L5,HOME_CFGM_opp_L5,HOME_CFGA_opp_L5,HOME_UFGM_opp_L5,HOME_UFGA_opp_L5,HOME_DFGM_opp_L5,HOME_DFGA_opp_L5,HOME_PTS_2PT_MR_opp_L5,HOME_PTS_FB_opp_L5,HOME_PTS_OFF_TOV_opp_L5,HOME_PTS_PAINT_opp_L5,HOME_AST_2PM_opp_L5,HOME_AST_3PM_opp_L5,HOME_UAST_2PM_opp_L5,HOME_UAST_3PM_opp_L5,HOME_WIN_PCT_L5,HOME_COVER_PCT_L5,HOME_OREB_PCT_L5,HOME_OREB_PCT_opp_L5,HOME_DREB_PCT_L5,HOME_DREB_PCT_opp_L5,HOME_REB_PCT_L5,HOME_REB_PCT_opp_L5,HOME_TS_PCT_L5,HOME_TS_PCT_opp_L5,HOME_EFG_PCT_L5,HOME_EFG_PCT_opp_L5,HOME_AST_RATIO_L5,HOME_AST_RATIO_opp_L5,HOME_TOV_PCT_L5,HOME_TOV_PCT_opp_L5,HOME_PIE_L5,HOME_FG2M_L10,HOME_FG2A_L10,HOME_FG3M_L10,HOME_FG3A_L10,HOME_FTM_L10,HOME_FTA_L10,HOME_OREB_L10,HOME_DREB_L10,HOME_AST_L10,HOME_STL_L10,HOME_BLK_L10,HOME_TOV_L10,HOME_PF_L10,HOME_OFF_RATING_L10,HOME_DEF_RATING_L10,HOME_PACE_L10,HOME_DIST_L10,HOME_ORBC_L10,HOME_DRBC_L10,HOME_RBC_L10,HOME_TCHS_L10,HOME_SAST_L10,HOME_FTAST_L10,HOME_PASS_L10,HOME_CFGM_L10,HOME_CFGA_L10,HOME_UFGM_L10,HOME_UFGA_L10,HOME_DFGM_L10,HOME_DFGA_L10,HOME_PTS_2PT_MR_L10,HOME_PTS_FB_L10,HOME_PTS_OFF_TOV_L10,HOME_PTS_PAINT_L10,HOME_AST_2PM_L10,HOME_AST_3PM_L10,HOME_UAST_2PM_L10,HOME_UAST_3PM_L10,HOME_AVG_ATS_DIFF_L10,HOME_FG2M_opp_L10,HOME_FG2A_opp_L10,HOME_FG3M_opp_L10,HOME_FG3A_opp_L10,HOME_FTM_opp_L10,HOME_FTA_opp_L10,HOME_OREB_opp_L10,HOME_DREB_opp_L10,HOME_AST_opp_L10,HOME_STL_opp_L10,HOME_BLK_opp_L10,HOME_TOV_opp_L10,HOME_PF_opp_L10,HOME_OFF_RATING_opp_L10,HOME_DEF_RATING_opp_L10,HOME_PACE_opp_L10,HOME_DIST_opp_L10,HOME_ORBC_opp_L10,HOME_DRBC_opp_L10,HOME_RBC_opp_L10,HOME_TCHS_opp_L10,HOME_SAST_opp_L10,HOME_FTAST_opp_L10,HOME_PASS_opp_L10,HOME_CFGM_opp_L10,HOME_CFGA_opp_L10,HOME_UFGM_opp_L10,HOME_UFGA_opp_L10,HOME_DFGM_opp_L10,HOME_DFGA_opp_L10,HOME_PTS_2PT_MR_opp_L10,HOME_PTS_FB_opp_L10,HOME_PTS_OFF_TOV_opp_L10,HOME_PTS_PAINT_opp_L10,HOME_AST_2PM_opp_L10,HOME_AST_3PM_opp_L10,HOME_UAST_2PM_opp_L10,HOME_UAST_3PM_opp_L10,HOME_WIN_PCT_L10,HOME_COVER_PCT_L10,HOME_OREB_PCT_L10,HOME_OREB_PCT_opp_L10,HOME_DREB_PCT_L10,HOME_DREB_PCT_opp_L10,HOME_REB_PCT_L10,HOME_REB_PCT_opp_L10,HOME_TS_PCT_L10,HOME_TS_PCT_opp_L10,HOME_EFG_PCT_L10,HOME_EFG_PCT_opp_L10,HOME_AST_RATIO_L10,HOME_AST_RATIO_opp_L10,HOME_TOV_PCT_L10,HOME_TOV_PCT_opp_L10,HOME_PIE_L10,HOME_FG2M_L20,HOME_FG2A_L20,HOME_FG3M_L20,HOME_FG3A_L20,HOME_FTM_L20,HOME_FTA_L20,HOME_OREB_L20,HOME_DREB_L20,HOME_AST_L20,HOME_STL_L20,HOME_BLK_L20,HOME_TOV_L20,...,AWAY_REB_PCT_L5,AWAY_REB_PCT_opp_L5,AWAY_TS_PCT_L5,AWAY_TS_PCT_opp_L5,AWAY_EFG_PCT_L5,AWAY_EFG_PCT_opp_L5,AWAY_AST_RATIO_L5,AWAY_AST_RATIO_opp_L5,AWAY_TOV_PCT_L5,AWAY_TOV_PCT_opp_L5,AWAY_PIE_L5,AWAY_FG2M_L10,AWAY_FG2A_L10,AWAY_FG3M_L10,AWAY_FG3A_L10,AWAY_FTM_L10,AWAY_FTA_L10,AWAY_OREB_L10,AWAY_DREB_L10,AWAY_AST_L10,AWAY_STL_L10,AWAY_BLK_L10,AWAY_TOV_L10,AWAY_PF_L10,AWAY_OFF_RATING_L10,AWAY_DEF_RATING_L10,AWAY_PACE_L10,AWAY_DIST_L10,AWAY_ORBC_L10,AWAY_DRBC_L10,AWAY_RBC_L10,AWAY_TCHS_L10,AWAY_SAST_L10,AWAY_FTAST_L10,AWAY_PASS_L10,AWAY_CFGM_L10,AWAY_CFGA_L10,AWAY_UFGM_L10,AWAY_UFGA_L10,AWAY_DFGM_L10,AWAY_DFGA_L10,AWAY_PTS_2PT_MR_L10,AWAY_PTS_FB_L10,AWAY_PTS_OFF_TOV_L10,AWAY_PTS_PAINT_L10,AWAY_AST_2PM_L10,AWAY_AST_3PM_L10,AWAY_UAST_2PM_L10,AWAY_UAST_3PM_L10,AWAY_AVG_ATS_DIFF_L10,AWAY_FG2M_opp_L10,AWAY_FG2A_opp_L10,AWAY_FG3M_opp_L10,AWAY_FG3A_opp_L10,AWAY_FTM_opp_L10,AWAY_FTA_opp_L10,AWAY_OREB_opp_L10,AWAY_DREB_opp_L10,AWAY_AST_opp_L10,AWAY_STL_opp_L10,AWAY_BLK_opp_L10,AWAY_TOV_opp_L10,AWAY_PF_opp_L10,AWAY_OFF_RATING_opp_L10,AWAY_DEF_RATING_opp_L10,AWAY_PACE_opp_L10,AWAY_DIST_opp_L10,AWAY_ORBC_opp_L10,AWAY_DRBC_opp_L10,AWAY_RBC_opp_L10,AWAY_TCHS_opp_L10,AWAY_SAST_opp_L10,AWAY_FTAST_opp_L10,AWAY_PASS_opp_L10,AWAY_CFGM_opp_L10,AWAY_CFGA_opp_L10,AWAY_UFGM_opp_L10,AWAY_UFGA_opp_L10,AWAY_DFGM_opp_L10,AWAY_DFGA_opp_L10,AWAY_PTS_2PT_MR_opp_L10,AWAY_PTS_FB_opp_L10,AWAY_PTS_OFF_TOV_opp_L10,AWAY_PTS_PAINT_opp_L10,AWAY_AST_2PM_opp_L10,AWAY_AST_3PM_opp_L10,AWAY_UAST_2PM_opp_L10,AWAY_UAST_3PM_opp_L10,AWAY_WIN_PCT_L10,AWAY_COVER_PCT_L10,AWAY_OREB_PCT_L10,AWAY_OREB_PCT_opp_L10,AWAY_DREB_PCT_L10,AWAY_DREB_PCT_opp_L10,AWAY_REB_PCT_L10,AWAY_REB_PCT_opp_L10,AWAY_TS_PCT_L10,AWAY_TS_PCT_opp_L10,AWAY_EFG_PCT_L10,AWAY_EFG_PCT_opp_L10,AWAY_AST_RATIO_L10,AWAY_AST_RATIO_opp_L10,AWAY_TOV_PCT_L10,AWAY_TOV_PCT_opp_L10,AWAY_PIE_L10,AWAY_FG2M_L20,AWAY_FG2A_L20,AWAY_FG3M_L20,AWAY_FG3A_L20,AWAY_FTM_L20,AWAY_FTA_L20,AWAY_OREB_L20,AWAY_DREB_L20,AWAY_AST_L20,AWAY_STL_L20,AWAY_BLK_L20,AWAY_TOV_L20,AWAY_PF_L20,AWAY_OFF_RATING_L20,AWAY_DEF_RATING_L20,AWAY_PACE_L20,AWAY_DIST_L20,AWAY_ORBC_L20,AWAY_DRBC_L20,AWAY_RBC_L20,AWAY_TCHS_L20,AWAY_SAST_L20,AWAY_FTAST_L20,AWAY_PASS_L20,AWAY_CFGM_L20,AWAY_CFGA_L20,AWAY_UFGM_L20,AWAY_UFGA_L20,AWAY_DFGM_L20,AWAY_DFGA_L20,AWAY_PTS_2PT_MR_L20,AWAY_PTS_FB_L20,AWAY_PTS_OFF_TOV_L20,AWAY_PTS_PAINT_L20,AWAY_AST_2PM_L20,AWAY_AST_3PM_L20,AWAY_UAST_2PM_L20,AWAY_UAST_3PM_L20,AWAY_AVG_ATS_DIFF_L20,AWAY_FG2M_opp_L20,AWAY_FG2A_opp_L20,AWAY_FG3M_opp_L20,AWAY_FG3A_opp_L20,AWAY_FTM_opp_L20,AWAY_FTA_opp_L20,AWAY_OREB_opp_L20,AWAY_DREB_opp_L20,AWAY_AST_opp_L20,AWAY_STL_opp_L20,AWAY_BLK_opp_L20,AWAY_TOV_opp_L20,AWAY_PF_opp_L20,AWAY_OFF_RATING_opp_L20,AWAY_DEF_RATING_opp_L20,AWAY_PACE_opp_L20,AWAY_DIST_opp_L20,AWAY_ORBC_opp_L20,AWAY_DRBC_opp_L20,AWAY_RBC_opp_L20,AWAY_TCHS_opp_L20,AWAY_SAST_opp_L20,AWAY_FTAST_opp_L20,AWAY_PASS_opp_L20,AWAY_CFGM_opp_L20,AWAY_CFGA_opp_L20,AWAY_UFGM_opp_L20,AWAY_UFGA_opp_L20,AWAY_DFGM_opp_L20,AWAY_DFGA_opp_L20,AWAY_PTS_2PT_MR_opp_L20,AWAY_PTS_FB_opp_L20,AWAY_PTS_OFF_TOV_opp_L20,AWAY_PTS_PAINT_opp_L20,AWAY_AST_2PM_opp_L20,AWAY_AST_3PM_opp_L20,AWAY_UAST_2PM_opp_L20,AWAY_UAST_3PM_opp_L20,AWAY_WIN_PCT_L20,AWAY_COVER_PCT_L20,AWAY_OREB_PCT_L20,AWAY_OREB_PCT_opp_L20,AWAY_DREB_PCT_L20,AWAY_DREB_PCT_opp_L20,AWAY_REB_PCT_L20,AWAY_REB_PCT_opp_L20,AWAY_TS_PCT_L20,AWAY_TS_PCT_opp_L20,AWAY_EFG_PCT_L20,AWAY_EFG_PCT_opp_L20,AWAY_AST_RATIO_L20,AWAY_AST_RATIO_opp_L20,AWAY_TOV_PCT_L20,AWAY_TOV_PCT_opp_L20,AWAY_PIE_L20,AWAY_REST
0,22.972936,50.302636,13.145775,34.324345,16.187315,21.187565,10.532736,35.13471,25.304569,6.467036,6.305064,19.284327,17.720297,101.570325,108.637707,95.278437,16.758943,25.829815,55.166471,80.62517,425.487727,3.271374,2.344109,298.905883,14.486567,35.423341,21.632143,49.203638,14.398237,24.946201,6.801928,10.61958,19.265781,38.478847,13.428227,10.408996,8.826331,1.783257,-8.007103,25.056052,48.890194,15.503273,44.116114,12.013749,14.137682,11.302183,33.557043,24.83298,10.997292,4.992057,14.527672,23.04914,108.637707,101.570325,95.278437,17.160614,26.910855,58.174772,83.119377,402.322714,2.404799,1.748888,276.476975,16.268955,35.024922,24.243422,57.887491,14.418757,24.643144,10.234675,12.167648,20.780375,39.115847,12.123744,12.100958,12.321196,2.479388,0.2,0.2,0.238893,0.243388,0.756612,0.761107,0.504464,0.495536,0.568779,0.565124,0.504468,0.519437,26.558548,26.063589,17.030534,12.771068,0.478826,23.148089,48.799501,13.736415,35.97458,17.665196,22.844526,10.018472,35.866618,25.449937,6.361282,6.430337,17.586406,18.880727,105.161091,107.538327,95.415325,16.788273,24.199227,57.545902,80.840733,425.320128,3.132057,2.457115,300.670692,14.597714,34.072109,22.286778,50.701486,14.561907,24.66335,6.967296,11.262467,17.758724,38.610887,13.244668,11.041322,9.202885,1.842398,-3.674895,25.617996,50.354085,14.254375,41.634013,13.529063,16.565257,10.958695,33.473226,23.567307,10.045788,4.655299,14.143797,22.622755,107.538327,105.161091,95.415325,17.236832,26.87269,55.890343,80.575108,399.356999,2.619482,2.017241,274.183734,16.916784,35.987244,22.884278,55.858154,14.202344,23.452672,10.681091,12.215249,19.840504,39.801216,12.201573,10.68036,12.758837,2.778131,0.5,0.5,0.230354,0.234034,0.765966,0.769646,0.508045,0.491955,0.585583,0.562195,0.51611,0.510931,26.672798,24.699709,15.644588,12.470218,0.502839,23.949644,47.891997,14.220773,37.398858,18.473444,23.491697,9.671561,36.045574,25.853612,6.47226,6.447512,16.215639,...,0.455463,0.544537,0.570964,0.591507,0.50744,0.528862,23.484036,24.849375,14.436383,12.617437,0.441927,25.655723,49.19945,13.022378,36.236007,18.382305,21.661949,8.01187,32.568785,22.896729,7.469171,4.302584,15.968829,21.775179,108.630987,112.146687,93.311396,17.045876,22.694476,52.424562,72.569008,414.279603,3.46224,1.679314,293.462145,15.842944,32.144143,22.798277,53.217551,17.46031,23.996564,5.559046,13.774137,13.635853,44.702729,12.6051,9.918591,12.131048,2.585949,-4.245431,31.903611,56.242173,10.176443,34.134339,18.389585,23.152289,10.762005,33.988591,23.096029,8.090799,4.403528,13.800378,20.075382,112.146687,108.630987,93.311396,17.588061,25.747277,57.090863,81.384264,411.66819,2.17476,2.656689,288.033734,18.997332,34.582145,23.082722,55.711947,16.612582,23.694123,11.14202,14.003499,21.069281,51.89958,14.11379,7.897312,16.8761,1.408886,0.4,0.4,0.190757,0.248369,0.751631,0.809243,0.475566,0.524434,0.60288,0.590375,0.528929,0.521909,24.537977,24.751563,14.394691,12.067076,0.477527,26.454115,49.466302,13.183535,35.620305,20.214526,23.965507,8.362239,33.487039,24.114582,7.476561,4.567592,15.335078,21.482868,112.314088,112.095417,94.353158,17.118358,22.572087,53.211115,73.198355,415.207348,3.586423,2.160644,294.534621,16.077438,31.789977,23.517014,53.210235,17.173554,24.064331,6.099096,13.656943,14.547099,45.799012,13.381441,10.235306,12.160523,2.435537,-2.032736,31.027127,55.91054,10.714274,34.02899,18.583299,23.577935,10.584957,32.976779,23.587184,7.77508,4.469258,13.41386,20.821955,112.095417,112.314088,94.353158,17.5913,25.086028,55.221784,78.641137,407.821826,2.259075,2.597766,284.871439,18.633197,34.811233,23.108204,55.021261,16.766567,23.859924,10.969835,14.655989,19.482659,50.315163,14.249265,8.396781,15.888513,1.573176,0.55,0.4,0.202284,0.240174,0.759826,0.797716,0.489975,0.510025,0.623476,0.59279,0.543322,0.523669,25.55779,24.998828,13.819555,11.794719,0.502509,8.0


In [96]:
print(LGBRegressor.predict(row))
print(SGDRegressor.predict(row))
print(LGBClassifier.predict_proba(row))
print(SGDClassifier_Hinge.predict(row))
print(SGDClassifier_LogLoss.predict_proba(row))

[[100.79764099  96.95598337]]
[[101.72996143 101.17784761]]
[[0.58214313 0.41785687]]
[0]
[[0.48772945 0.51227055]]


In [220]:
home_team = 'PHI'
away_team = 'BOS'

row = make_matchup_row(home_team, away_team, df = df_full)
lgbr_model.predict(row)

creating matchups between Home and Away team aggregated stats


array([[102.70060833, 100.81850387]])

In [221]:
home_team = 'LAL'
away_team = 'OKC'

row = make_matchup_row(home_team, away_team, df = df_full)
lgbr_model.predict(row)

creating matchups between Home and Away team aggregated stats


array([[113.77664723, 114.94091434]])

In [198]:
lgbr_model.predict(row)

array([[113.77664723, 114.94091434]])

In [106]:
home_team = 'LAL'
away_team = 'SAS'
start_season = 2013
end_season = 2021

start_season = season_to_string(start_season)
end_season = season_to_string(end_season)

db_filepath = Path.home().joinpath('NBA_model_v1', 'data', 'nba.db')

conn = sqlite3.connect(db_filepath)

print("Loading raw team boxscore data from sql database...")

df = load_team_data(conn, start_season, end_season)
print("Loading betting data from sql database...")
spreads, moneylines = load_betting_data(conn)

print("Cleaning Data...")

df = clean_team_data(df)
df = prep_for_aggregation(df)

clean_mls = clean_moneyline_df(df = moneylines)
clean_spreads = clean_spreads_df(df = spreads)


print("Merging Boxscore and Betting Data...")
merged_df = merge_betting_and_boxscore_data(
    clean_spreads, clean_mls, clean_boxscores = df)


stats_per_100 = normalize_per_100_poss(merged_df)

print("Aggregating over last 5, 10, and 20 game windows")

matchups = create_matchups(stats_per_100)

team_stats_ewa_5 = build_team_avg_stats_df(matchups, span=5)
team_stats_ewa_5 = add_percentage_features(team_stats_ewa_5, span=5)

team_stats_ewa_10 = build_team_avg_stats_df(matchups, span=10)
team_stats_ewa_10 = add_percentage_features(team_stats_ewa_10, span=10)

team_stats_ewa_20 = build_team_avg_stats_df(matchups, span=20)
team_stats_ewa_20 = add_percentage_features(team_stats_ewa_20, span=20)


temp = pd.merge(team_stats_ewa_5, team_stats_ewa_10, how='inner',
                on=['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE',
                    'GAME_ID', 'MATCHUP', 'HOME_GAME', 'TEAM_SCORE',
                    'ML', 'SPREAD', 'ATS_DIFF', 'TEAM_COVERED', 
                    'POINT_DIFF', 'WL'])

df_full = pd.merge(temp, team_stats_ewa_20, how='inner', 
                    on=['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE',
                        'GAME_ID', 'MATCHUP', 'HOME_GAME', 'TEAM_SCORE',
                        'ML', 'SPREAD', 'ATS_DIFF', 'TEAM_COVERED', 
                        'POINT_DIFF', 'WL'])

df_full = df_full.sort_values(['GAME_DATE', 'GAME_ID', 'HOME_GAME'])


columns_to_drop = ['PTS_L5', 'PTS_L10', 'PTS_L20',
                    'PLUS_MINUS_L5', 'PLUS_MINUS_L10', 'PLUS_MINUS_L20',
                    'NET_RATING_L5', 'NET_RATING_L10', 'NET_RATING_L20',
                    'POSS_L5', 'POSS_L10', 'POSS_L20',
                    'REB_L5', 'REB_L10', 'REB_L20',
                    'REB_opp_L5', 'REB_opp_L10', 'REB_opp_L20',
                    'PTS_opp_L5', 'PTS_opp_L10', 'PTS_opp_L20',
                    'PLUS_MINUS_opp_L5', 'PLUS_MINUS_opp_L10', 'PLUS_MINUS_opp_L20',
                    'NET_RATING_opp_L5', 'NET_RATING_opp_L10', 'NET_RATING_opp_L20',
                    'POSS_opp_L5', 'POSS_opp_L10', 'POSS_opp_L20']


df_full = df_full.drop(columns = columns_to_drop)

print("adding rest days")
df_full = add_rest_days(df_full)

print("creating matchups between Home and Away team aggregated stats")

matchup_info_cols = ['SEASON', 'TEAM_ABBREVIATION', 'GAME_DATE', 'GAME_ID', 'MATCHUP',
       'HOME_GAME', 'TEAM_SCORE', 'ML', 'SPREAD', 'ATS_DIFF', 'TEAM_COVERED',
       'POINT_DIFF', 'WL']

most_recent_home_stats = df_full.loc[df_full['TEAM_ABBREVIATION'] == home_team].tail(1).drop(columns=matchup_info_cols).reset_index(drop=True)
most_recent_home_stats = most_recent_home_stats.add_prefix('HOME_')
most_recent_away_stats = df_full.loc[df_full['TEAM_ABBREVIATION'] == away_team].tail(1).drop(columns=matchup_info_cols).reset_index(drop=True)
most_recent_away_stats = most_recent_away_stats.add_prefix('AWAY_')

matchup_row = pd.concat([most_recent_home_stats, most_recent_away_stats], axis=1)

Loading raw team boxscore data from sql database...
Loading betting data from sql database...
Cleaning Data...
Merging Boxscore and Betting Data...
Aggregating over last 5, 10, and 20 game windows
adding rest days
creating matchups between Home and Away team aggregated stats


In [107]:
matchup_row

Unnamed: 0,HOME_FG2M_L5,HOME_FG2A_L5,HOME_FG3M_L5,HOME_FG3A_L5,HOME_FTM_L5,HOME_FTA_L5,HOME_OREB_L5,HOME_DREB_L5,HOME_AST_L5,HOME_STL_L5,HOME_BLK_L5,HOME_TOV_L5,HOME_PF_L5,HOME_OFF_RATING_L5,HOME_DEF_RATING_L5,HOME_PACE_L5,HOME_DIST_L5,HOME_ORBC_L5,HOME_DRBC_L5,HOME_RBC_L5,HOME_TCHS_L5,HOME_SAST_L5,HOME_FTAST_L5,HOME_PASS_L5,HOME_CFGM_L5,HOME_CFGA_L5,HOME_UFGM_L5,HOME_UFGA_L5,HOME_DFGM_L5,HOME_DFGA_L5,HOME_PTS_2PT_MR_L5,HOME_PTS_FB_L5,HOME_PTS_OFF_TOV_L5,HOME_PTS_PAINT_L5,HOME_AST_2PM_L5,HOME_AST_3PM_L5,HOME_UAST_2PM_L5,HOME_UAST_3PM_L5,HOME_AVG_ATS_DIFF_L5,HOME_FG2M_opp_L5,HOME_FG2A_opp_L5,HOME_FG3M_opp_L5,HOME_FG3A_opp_L5,HOME_FTM_opp_L5,HOME_FTA_opp_L5,HOME_OREB_opp_L5,HOME_DREB_opp_L5,HOME_AST_opp_L5,HOME_STL_opp_L5,HOME_BLK_opp_L5,HOME_TOV_opp_L5,HOME_PF_opp_L5,HOME_OFF_RATING_opp_L5,HOME_DEF_RATING_opp_L5,HOME_PACE_opp_L5,HOME_DIST_opp_L5,HOME_ORBC_opp_L5,HOME_DRBC_opp_L5,HOME_RBC_opp_L5,HOME_TCHS_opp_L5,HOME_SAST_opp_L5,HOME_FTAST_opp_L5,HOME_PASS_opp_L5,HOME_CFGM_opp_L5,HOME_CFGA_opp_L5,HOME_UFGM_opp_L5,HOME_UFGA_opp_L5,HOME_DFGM_opp_L5,HOME_DFGA_opp_L5,HOME_PTS_2PT_MR_opp_L5,HOME_PTS_FB_opp_L5,HOME_PTS_OFF_TOV_opp_L5,HOME_PTS_PAINT_opp_L5,HOME_AST_2PM_opp_L5,HOME_AST_3PM_opp_L5,HOME_UAST_2PM_opp_L5,HOME_UAST_3PM_opp_L5,HOME_WIN_PCT_L5,HOME_COVER_PCT_L5,HOME_OREB_PCT_L5,HOME_OREB_PCT_opp_L5,HOME_DREB_PCT_L5,HOME_DREB_PCT_opp_L5,HOME_REB_PCT_L5,HOME_REB_PCT_opp_L5,HOME_TS_PCT_L5,HOME_TS_PCT_opp_L5,HOME_EFG_PCT_L5,HOME_EFG_PCT_opp_L5,HOME_AST_RATIO_L5,HOME_AST_RATIO_opp_L5,HOME_TOV_PCT_L5,HOME_TOV_PCT_opp_L5,HOME_PIE_L5,HOME_FG2M_L10,HOME_FG2A_L10,HOME_FG3M_L10,HOME_FG3A_L10,HOME_FTM_L10,HOME_FTA_L10,HOME_OREB_L10,HOME_DREB_L10,HOME_AST_L10,HOME_STL_L10,HOME_BLK_L10,HOME_TOV_L10,HOME_PF_L10,HOME_OFF_RATING_L10,HOME_DEF_RATING_L10,HOME_PACE_L10,HOME_DIST_L10,HOME_ORBC_L10,HOME_DRBC_L10,HOME_RBC_L10,HOME_TCHS_L10,HOME_SAST_L10,HOME_FTAST_L10,HOME_PASS_L10,HOME_CFGM_L10,HOME_CFGA_L10,HOME_UFGM_L10,HOME_UFGA_L10,HOME_DFGM_L10,HOME_DFGA_L10,HOME_PTS_2PT_MR_L10,HOME_PTS_FB_L10,HOME_PTS_OFF_TOV_L10,HOME_PTS_PAINT_L10,HOME_AST_2PM_L10,HOME_AST_3PM_L10,HOME_UAST_2PM_L10,HOME_UAST_3PM_L10,HOME_AVG_ATS_DIFF_L10,HOME_FG2M_opp_L10,HOME_FG2A_opp_L10,HOME_FG3M_opp_L10,HOME_FG3A_opp_L10,HOME_FTM_opp_L10,HOME_FTA_opp_L10,HOME_OREB_opp_L10,HOME_DREB_opp_L10,HOME_AST_opp_L10,HOME_STL_opp_L10,HOME_BLK_opp_L10,HOME_TOV_opp_L10,HOME_PF_opp_L10,HOME_OFF_RATING_opp_L10,HOME_DEF_RATING_opp_L10,HOME_PACE_opp_L10,HOME_DIST_opp_L10,HOME_ORBC_opp_L10,HOME_DRBC_opp_L10,HOME_RBC_opp_L10,HOME_TCHS_opp_L10,HOME_SAST_opp_L10,HOME_FTAST_opp_L10,HOME_PASS_opp_L10,HOME_CFGM_opp_L10,HOME_CFGA_opp_L10,HOME_UFGM_opp_L10,HOME_UFGA_opp_L10,HOME_DFGM_opp_L10,HOME_DFGA_opp_L10,HOME_PTS_2PT_MR_opp_L10,HOME_PTS_FB_opp_L10,HOME_PTS_OFF_TOV_opp_L10,HOME_PTS_PAINT_opp_L10,HOME_AST_2PM_opp_L10,HOME_AST_3PM_opp_L10,HOME_UAST_2PM_opp_L10,HOME_UAST_3PM_opp_L10,HOME_WIN_PCT_L10,HOME_COVER_PCT_L10,HOME_OREB_PCT_L10,HOME_OREB_PCT_opp_L10,HOME_DREB_PCT_L10,HOME_DREB_PCT_opp_L10,HOME_REB_PCT_L10,HOME_REB_PCT_opp_L10,HOME_TS_PCT_L10,HOME_TS_PCT_opp_L10,HOME_EFG_PCT_L10,HOME_EFG_PCT_opp_L10,HOME_AST_RATIO_L10,HOME_AST_RATIO_opp_L10,HOME_TOV_PCT_L10,HOME_TOV_PCT_opp_L10,HOME_PIE_L10,HOME_FG2M_L20,HOME_FG2A_L20,HOME_FG3M_L20,HOME_FG3A_L20,HOME_FTM_L20,HOME_FTA_L20,HOME_OREB_L20,HOME_DREB_L20,HOME_AST_L20,HOME_STL_L20,HOME_BLK_L20,HOME_TOV_L20,...,AWAY_REB_PCT_L5,AWAY_REB_PCT_opp_L5,AWAY_TS_PCT_L5,AWAY_TS_PCT_opp_L5,AWAY_EFG_PCT_L5,AWAY_EFG_PCT_opp_L5,AWAY_AST_RATIO_L5,AWAY_AST_RATIO_opp_L5,AWAY_TOV_PCT_L5,AWAY_TOV_PCT_opp_L5,AWAY_PIE_L5,AWAY_FG2M_L10,AWAY_FG2A_L10,AWAY_FG3M_L10,AWAY_FG3A_L10,AWAY_FTM_L10,AWAY_FTA_L10,AWAY_OREB_L10,AWAY_DREB_L10,AWAY_AST_L10,AWAY_STL_L10,AWAY_BLK_L10,AWAY_TOV_L10,AWAY_PF_L10,AWAY_OFF_RATING_L10,AWAY_DEF_RATING_L10,AWAY_PACE_L10,AWAY_DIST_L10,AWAY_ORBC_L10,AWAY_DRBC_L10,AWAY_RBC_L10,AWAY_TCHS_L10,AWAY_SAST_L10,AWAY_FTAST_L10,AWAY_PASS_L10,AWAY_CFGM_L10,AWAY_CFGA_L10,AWAY_UFGM_L10,AWAY_UFGA_L10,AWAY_DFGM_L10,AWAY_DFGA_L10,AWAY_PTS_2PT_MR_L10,AWAY_PTS_FB_L10,AWAY_PTS_OFF_TOV_L10,AWAY_PTS_PAINT_L10,AWAY_AST_2PM_L10,AWAY_AST_3PM_L10,AWAY_UAST_2PM_L10,AWAY_UAST_3PM_L10,AWAY_AVG_ATS_DIFF_L10,AWAY_FG2M_opp_L10,AWAY_FG2A_opp_L10,AWAY_FG3M_opp_L10,AWAY_FG3A_opp_L10,AWAY_FTM_opp_L10,AWAY_FTA_opp_L10,AWAY_OREB_opp_L10,AWAY_DREB_opp_L10,AWAY_AST_opp_L10,AWAY_STL_opp_L10,AWAY_BLK_opp_L10,AWAY_TOV_opp_L10,AWAY_PF_opp_L10,AWAY_OFF_RATING_opp_L10,AWAY_DEF_RATING_opp_L10,AWAY_PACE_opp_L10,AWAY_DIST_opp_L10,AWAY_ORBC_opp_L10,AWAY_DRBC_opp_L10,AWAY_RBC_opp_L10,AWAY_TCHS_opp_L10,AWAY_SAST_opp_L10,AWAY_FTAST_opp_L10,AWAY_PASS_opp_L10,AWAY_CFGM_opp_L10,AWAY_CFGA_opp_L10,AWAY_UFGM_opp_L10,AWAY_UFGA_opp_L10,AWAY_DFGM_opp_L10,AWAY_DFGA_opp_L10,AWAY_PTS_2PT_MR_opp_L10,AWAY_PTS_FB_opp_L10,AWAY_PTS_OFF_TOV_opp_L10,AWAY_PTS_PAINT_opp_L10,AWAY_AST_2PM_opp_L10,AWAY_AST_3PM_opp_L10,AWAY_UAST_2PM_opp_L10,AWAY_UAST_3PM_opp_L10,AWAY_WIN_PCT_L10,AWAY_COVER_PCT_L10,AWAY_OREB_PCT_L10,AWAY_OREB_PCT_opp_L10,AWAY_DREB_PCT_L10,AWAY_DREB_PCT_opp_L10,AWAY_REB_PCT_L10,AWAY_REB_PCT_opp_L10,AWAY_TS_PCT_L10,AWAY_TS_PCT_opp_L10,AWAY_EFG_PCT_L10,AWAY_EFG_PCT_opp_L10,AWAY_AST_RATIO_L10,AWAY_AST_RATIO_opp_L10,AWAY_TOV_PCT_L10,AWAY_TOV_PCT_opp_L10,AWAY_PIE_L10,AWAY_FG2M_L20,AWAY_FG2A_L20,AWAY_FG3M_L20,AWAY_FG3A_L20,AWAY_FTM_L20,AWAY_FTA_L20,AWAY_OREB_L20,AWAY_DREB_L20,AWAY_AST_L20,AWAY_STL_L20,AWAY_BLK_L20,AWAY_TOV_L20,AWAY_PF_L20,AWAY_OFF_RATING_L20,AWAY_DEF_RATING_L20,AWAY_PACE_L20,AWAY_DIST_L20,AWAY_ORBC_L20,AWAY_DRBC_L20,AWAY_RBC_L20,AWAY_TCHS_L20,AWAY_SAST_L20,AWAY_FTAST_L20,AWAY_PASS_L20,AWAY_CFGM_L20,AWAY_CFGA_L20,AWAY_UFGM_L20,AWAY_UFGA_L20,AWAY_DFGM_L20,AWAY_DFGA_L20,AWAY_PTS_2PT_MR_L20,AWAY_PTS_FB_L20,AWAY_PTS_OFF_TOV_L20,AWAY_PTS_PAINT_L20,AWAY_AST_2PM_L20,AWAY_AST_3PM_L20,AWAY_UAST_2PM_L20,AWAY_UAST_3PM_L20,AWAY_AVG_ATS_DIFF_L20,AWAY_FG2M_opp_L20,AWAY_FG2A_opp_L20,AWAY_FG3M_opp_L20,AWAY_FG3A_opp_L20,AWAY_FTM_opp_L20,AWAY_FTA_opp_L20,AWAY_OREB_opp_L20,AWAY_DREB_opp_L20,AWAY_AST_opp_L20,AWAY_STL_opp_L20,AWAY_BLK_opp_L20,AWAY_TOV_opp_L20,AWAY_PF_opp_L20,AWAY_OFF_RATING_opp_L20,AWAY_DEF_RATING_opp_L20,AWAY_PACE_opp_L20,AWAY_DIST_opp_L20,AWAY_ORBC_opp_L20,AWAY_DRBC_opp_L20,AWAY_RBC_opp_L20,AWAY_TCHS_opp_L20,AWAY_SAST_opp_L20,AWAY_FTAST_opp_L20,AWAY_PASS_opp_L20,AWAY_CFGM_opp_L20,AWAY_CFGA_opp_L20,AWAY_UFGM_opp_L20,AWAY_UFGA_opp_L20,AWAY_DFGM_opp_L20,AWAY_DFGA_opp_L20,AWAY_PTS_2PT_MR_opp_L20,AWAY_PTS_FB_opp_L20,AWAY_PTS_OFF_TOV_opp_L20,AWAY_PTS_PAINT_opp_L20,AWAY_AST_2PM_opp_L20,AWAY_AST_3PM_opp_L20,AWAY_UAST_2PM_opp_L20,AWAY_UAST_3PM_opp_L20,AWAY_WIN_PCT_L20,AWAY_COVER_PCT_L20,AWAY_OREB_PCT_L20,AWAY_OREB_PCT_opp_L20,AWAY_DREB_PCT_L20,AWAY_DREB_PCT_opp_L20,AWAY_REB_PCT_L20,AWAY_REB_PCT_opp_L20,AWAY_TS_PCT_L20,AWAY_TS_PCT_opp_L20,AWAY_EFG_PCT_L20,AWAY_EFG_PCT_opp_L20,AWAY_AST_RATIO_L20,AWAY_AST_RATIO_opp_L20,AWAY_TOV_PCT_L20,AWAY_TOV_PCT_opp_L20,AWAY_PIE_L20,AWAY_REST
0,29.598352,53.032355,8.836455,29.109184,23.71382,31.905133,7.354883,34.745477,21.104468,7.071961,3.477168,13.355004,17.041755,112.150127,112.750179,103.005114,17.50083,16.563352,52.124248,66.285601,385.812615,2.558406,4.251585,269.667966,16.677863,30.443097,22.996798,51.698441,20.981652,29.187215,9.341931,16.740348,16.918777,51.088201,14.140919,6.709147,16.192941,1.255792,3.388021,33.226268,56.880015,10.794969,31.514311,14.251388,17.562956,8.577051,34.433078,28.450887,7.524976,3.604473,12.94311,22.370539,112.750179,112.150127,103.005114,17.910909,22.381343,53.898062,73.462069,416.75511,4.885709,2.765671,298.121387,17.759715,32.019048,26.249902,56.352039,20.731325,29.448804,9.492262,10.500231,15.788497,56.263211,18.630534,9.047361,13.814411,1.193045,0.2,0.4,0.176005,0.197981,0.802019,0.823995,0.494655,0.505345,0.627517,0.612893,0.521697,0.559071,20.488757,27.620849,12.192476,11.867322,0.47886,30.514195,54.093622,9.794849,30.512487,20.736454,28.084813,8.101502,33.870247,22.387815,6.756771,3.340736,13.474073,18.609531,112.478914,116.640867,101.906097,17.356484,18.941844,50.390501,66.479575,386.151842,2.718142,3.929967,268.566356,17.162805,31.425396,23.900806,53.180598,20.196459,28.025713,9.774759,14.756955,16.064386,51.444166,14.525579,7.421949,16.06506,1.527202,1.09029,33.018708,55.921621,11.70965,32.871648,15.981773,20.152691,9.069365,34.16433,28.330549,7.774141,3.553243,12.229418,20.6346,116.640867,112.478914,101.906097,17.748324,22.080015,53.251488,72.61188,411.294993,4.936867,2.929982,291.555564,18.131235,32.138237,26.547993,56.556901,20.699902,29.819833,10.414068,12.717901,16.234885,54.637506,17.704966,9.824739,14.566557,1.381964,0.2,0.5,0.19168,0.211212,0.788788,0.80832,0.492595,0.507405,0.62047,0.628296,0.534317,0.569674,21.969063,27.800641,12.200632,11.128795,0.464423,30.590365,54.12868,10.871616,32.444998,18.657747,25.433229,8.802549,33.26639,23.27828,6.736077,3.677204,13.618369,...,0.48552,0.51448,0.602393,0.564906,0.541093,0.491562,25.905228,23.654808,11.236726,10.70696,0.532069,29.32364,54.64486,11.85011,32.944141,20.212221,26.559534,8.824024,36.764328,26.877511,6.753505,3.931142,12.672963,18.6011,112.956136,108.827581,100.01925,18.631162,21.210962,62.631717,81.555357,417.434125,3.54624,2.689437,296.407204,17.759333,35.244754,22.81967,52.33996,15.999779,27.962756,8.059483,12.784178,15.740108,48.224113,15.780001,10.363304,12.17253,1.026757,5.437991,26.069259,52.013155,13.007021,38.144691,17.556713,22.78794,11.620226,34.954074,23.764939,7.33454,3.398527,12.474926,21.670391,108.827581,112.956136,100.01925,18.302578,26.335403,59.877107,83.310077,423.112218,2.305959,3.106775,299.873619,17.133504,35.876725,21.942774,54.276449,16.59942,24.533516,6.537973,10.916709,14.44352,44.786252,12.090621,11.015694,13.325996,1.735169,0.7,0.9,0.201563,0.240164,0.759836,0.798437,0.494651,0.505349,0.605896,0.571162,0.537725,0.505555,26.872339,23.760365,11.320386,11.073127,0.535736,29.933396,56.123245,11.852972,33.384928,19.247432,25.030372,10.070462,35.410364,27.254562,7.00106,4.28419,12.779294,18.201023,113.812942,110.905425,100.353702,18.669576,23.491891,59.888405,81.12319,417.450755,3.4607,2.643976,294.953625,18.735016,36.83977,22.833782,52.645506,16.582183,28.140496,8.678049,12.667836,16.149655,49.644174,16.196305,10.388566,12.743279,0.968997,3.400112,27.760667,54.103882,12.793663,36.250085,17.247029,22.26147,11.578316,34.810201,23.719471,7.322165,3.962279,12.891714,20.828356,110.905425,113.812942,100.353702,18.333961,26.39975,59.2499,82.820963,415.761166,2.373999,2.846321,292.639978,18.006574,36.914415,22.547328,53.42466,16.988694,25.688868,7.990644,11.42775,15.238114,46.587298,12.532166,10.38881,14.519834,2.001886,0.5,0.6,0.224383,0.246406,0.753594,0.775617,0.49506,0.50494,0.601159,0.583452,0.533056,0.519636,27.158502,23.635871,11.279083,11.404486,0.525536,1.0


## Automatically load results into google sheets

In [111]:
import gspread 
import df2gspread as d2g

In [112]:
path_to_results = Path().home().joinpath('NBA_Model_v1', 'results', 'betting_predictions_2022.csv')
pd.read_csv(path_to_results)

Unnamed: 0,home_team,away_team,game_date,home_spread,home_moneylines,away_moneylines,sgd_home_score_pred,sgd_away_score_pred,lgb_home_score_pred,lgb_away_score_pred,home_win_prob_sgd_hinge,home_win_prob_sgd_logloss,away_win_prob_sgd_logloss,home_win_prob_lgbc,away_win_prob_lgbc
0,DET,ORL,2022-10-19,-3.0,1.689655,2.25,113.409544,111.058845,107.90339,108.97123,1,0.623637,0.376363,0.487241,0.512759
1,IND,WAS,2022-10-19,1.0,1.952381,1.869565,115.204471,116.127576,115.438471,114.960548,1,0.462378,0.537622,0.359084,0.640916
2,BKN,NOP,2022-10-19,-3.0,1.666667,2.3,110.883008,108.779141,111.617562,105.747313,0,0.707338,0.292662,0.545411,0.454589
3,MEM,NYK,2022-10-19,-4.0,1.571429,2.5,104.937682,109.503584,106.732776,105.929732,0,0.396311,0.603689,0.392375,0.607625
4,ATL,HOU,2022-10-19,-9.5,1.222222,4.6,115.801929,110.365446,116.699032,108.988385,1,0.73086,0.26914,0.794196,0.205804
5,MIA,CHI,2022-10-19,-7.5,1.338983,3.45,104.525873,102.368288,104.244848,98.118047,1,0.63749,0.36251,0.664986,0.335014
6,TOR,CLE,2022-10-19,-2.5,1.714286,2.2,111.18509,111.511306,110.699297,112.730757,1,0.560377,0.439623,0.525458,0.474542
7,MIN,OKC,2022-10-19,-10.5,1.178571,5.3,114.470001,109.015252,113.238209,110.976124,1,0.75112,0.24888,0.699785,0.300215
8,SAS,CHA,2022-10-19,0.0,1.909091,1.909091,114.13096,117.758485,115.072217,120.363324,0,0.487087,0.512913,0.345158,0.654842
9,UTA,DEN,2022-10-19,7.0,3.25,1.377358,109.031661,108.730047,110.424503,106.791883,1,0.673097,0.326903,0.706642,0.293358


In [114]:
gc = gspread.oauth()

sh = gc.open("Example spreadsheet")

print(sh.sheet1.get('A1'))

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Jordan Nishimura\\AppData\\Roaming\\gspread\\credentials.json'