In [44]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashplayerstats, leaguedashteamclutch, leaguegamelog, boxscoreplayertrackv2, playercareerstats, teamplayerdashboard, leaguestandings
from nba_api.stats.static import players, teams
import time
pd.set_option('display.max_columns', None)
from IPython import display as ICD
import warnings
warnings.filterwarnings('ignore')

In [45]:
abbrv_mapping = {'DEN':"1610612743", 
           'SAS':"1610612759", 
           'IND':"1610612754", 
           'MEM':"1610612763", 
           'NOP':"1610612740", 
           'CHI':"1610612741", 
           'MIN':"1610612750", 
           'MIA':"1610612748", 
           'ATL':"1610612737",
           'ORL':"1610612753", 
           'SAC':"1610612758", 
           'UTA':"1610612762", 
           'CHA':"1610612766", 
           'WAS':"1610612764", 
           'HOU':"1610612745", 
           'PHX':"1610612756", 
           'OKC':"1610612760", 
           'POR':"1610612757",
           'LAC':"1610612746", 
           'GSW':"1610612744", 
           'PHI':"1610612755", 
           'LAL':"1610612747", 
           'BKN':"1610612751", 
           'TOR':"1610612761", 
           'BOS':"1610612738", 
           'MIL':"1610612749", 
           'DET':"1610612765",
           'CLE':"1610612739", 
           'NYK':"1610612752",
           'DAL':"1610612742"}

name_mapping = {'Nuggets':"1610612743", 
           'Spurs':"1610612759", 
           'Pacers':"1610612754", 
           'Memphis':"1610612763", 
           'Pelicans':"1610612740", 
           'Bulls':"1610612741", 
           'Timberwolves':"1610612750", 
           'Heat':"1610612748", 
           'Hawks':"1610612737",
           'Magic':"1610612753", 
           'Kings':"1610612758", 
           'Jazz':"1610612762", 
           'Hornets':"1610612766", 
           'Wizards':"1610612764", 
           'Rockets':"1610612745", 
           'Suns':"1610612756", 
           'Thunder':"1610612760", 
           'Trail Blazers':"1610612757",
           'Clippers':"1610612746", 
           'Warriors':"1610612744", 
           '76ers':"1610612755", 
           'Lakers':"1610612747", 
           'Nets':"1610612751", 
           'Raptors':"1610612761", 
           'Celtics':"1610612738", 
           'Bucks':"1610612749", 
           'Pistons':"1610612765",
           'Cavaliers':"1610612739", 
           'Knicks':"1610612752",
           'Mavericks':"1610612742"}

In [46]:
def clean_all_playoff_games(season_name):
    '''
    clean_all_playoff_games - add a few columns and remove irrelevant ones
    '''
    all_games = leaguegamelog.LeagueGameLog(season_type_all_star='Playoffs', season=season_name).get_data_frames()[0]
    all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE']) 
    all_games = all_games.sort_values('GAME_DATE',ascending = False)
    all_games['OpponentTeamID'] = all_games['MATCHUP'].apply(lambda x: int(abbrv_mapping[x[-3:]]))
    all_games['OPTS'] = all_games['PTS'] - all_games['PLUS_MINUS']
    all_games = all_games[['GAME_ID','GAME_DATE','TEAM_ID','OpponentTeamID','MATCHUP','PTS','OPTS']]
    return all_games.rename(columns = {'GAME_ID':'GameID','GAME_DATE':'GameDate', 'TEAM_ID':'TeamID'})

def clean_all_games(season_name):
    '''
    clean_all_playoff_games - add a few columns and remove irrelevant ones
    '''
    all_games = leaguegamelog.LeagueGameLog(season=season_name).get_data_frames()[0]
    all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE']) 
    all_games = all_games.sort_values('GAME_DATE',ascending = False)
    all_games['OpponentTeamID'] = all_games['MATCHUP'].apply(lambda x: int(abbrv_mapping[x[-3:]]))
    all_games['OPTS'] = all_games['PTS'] - all_games['PLUS_MINUS']
    all_games = all_games[['GAME_ID','GAME_DATE','TEAM_ID','OpponentTeamID','MATCHUP','PTS','OPTS']]
    return all_games.rename(columns = {'GAME_ID':'GameID','GAME_DATE':'GameDate', 'TEAM_ID':'TeamID'})

all_playoff_games = clean_all_playoff_games('2019-20')
all_regseason_playoff_games = clean_all_games('2019-20')
all_playoff_games

Unnamed: 0,GameID,GameDate,TeamID,OpponentTeamID,MATCHUP,PTS,OPTS


In [47]:
player_rotation = pd.read_csv('players_rotation.csv')
players_df = pd.read_csv('players.csv')
first_game_missing_mins = player_rotation.groupby('TEAM_ID').sum()['MIN'] / players_df.groupby('TEAM_ID').sum()['MIN']

In [48]:
def missing_mins(curr_game_id, team_id, season_name):
    '''
    missing_mins: gets the fraction of the season's minutes played by injured/inactive/traded players
    '''
    time.sleep(0.5)
    # get the season stats of the team
    season_stats = teamplayerdashboard.TeamPlayerDashboard(team_id, season=season_name).get_data_frames()[1]
    
    # get the game currently being assessed
    curr_game = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=curr_game_id).get_data_frames()[0]
    
    player_game_stats = curr_game[curr_game['TEAM_ID'] == int(team_id)]
    combined = pd.merge(season_stats, player_game_stats, how='left', on='PLAYER_ID')
    
    total_mins = combined['MIN_x'].sum()
    missing_mins = combined[(combined['MIN_y'] == "0:00") | (combined['MIN_y'].isna())]['MIN_x'].sum()
    return (total_mins - missing_mins) / total_mins

In [49]:
games = pd.read_csv('predictions/games.csv')
games['Date'] = games['Date'].astype('datetime64')
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual
0,2020-08-17,,,,Nuggets,Jazz,,,,,,
1,2020-08-17,,,,Raptors,Nets,,,,,,
2,2020-08-17,,,,Celtics,76ers,,,,,,
3,2020-08-17,,,,Clippers,Mavericks,,,,,,
4,2020-08-18,,,,Bucks,Magic,,,,,,
5,2020-08-18,,,,Pacers,Heat,,,,,,
6,2020-08-18,,,,Rockets,Thunder,,,,,,
7,2020-08-18,,,,Lakers,Trail Blazers,,,,,,


In [50]:
def get_gameid(row):
    if(not pd.isna(row['GameID'])):
        return row['GameID']
    temp = all_playoff_games[all_playoff_games['TeamID'] == row['TeamID']]
    temp = all_playoff_games[all_playoff_games['OpponentTeamID'] == row['OpponentTeamID']]
    temp = temp[temp['GameDate'].apply(lambda x: x.date() == row['Date'].date())]
    if(len(temp) != 0):
        return temp['GameID'].iloc[0]
    return row['GameID']
def get_game_info(row, attribute, alt_attribute):
    if(pd.isna(row['GameID'])):
        return row[alt_attribute]
    try:
        return all_playoff_games[(all_playoff_games['GameID'] == row['GameID']) & (all_playoff_games['TeamID'] == row['TeamID'])][attribute].iloc[0]
    except:
        return row[alt_attribute]
games['TeamID'] = games['TeamName'].apply(lambda x: int(name_mapping[x]))
games['OpponentTeamID'] = games['OpponentTeamName'].apply(lambda x: int(name_mapping[x]))
games['GameID'] = games.apply(lambda x: get_gameid(x), axis = 1)
games['Date'] = games.apply(lambda x: get_game_info(x, 'GameDate', 'Date'), axis = 1)
games['TeamPointsActual'] = games.apply(lambda x: get_game_info(x, 'PTS', 'TeamPoints'), axis = 1)
games['OpponentTeamPointsActual'] = games.apply(lambda x: get_game_info(x, 'OPTS', 'OpponentTeamPoints'), axis = 1)
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual
0,2020-08-17,,1610612743,1610612762,Nuggets,Jazz,,,,,,
1,2020-08-17,,1610612761,1610612751,Raptors,Nets,,,,,,
2,2020-08-17,,1610612738,1610612755,Celtics,76ers,,,,,,
3,2020-08-17,,1610612746,1610612742,Clippers,Mavericks,,,,,,
4,2020-08-18,,1610612749,1610612753,Bucks,Magic,,,,,,
5,2020-08-18,,1610612754,1610612748,Pacers,Heat,,,,,,
6,2020-08-18,,1610612745,1610612760,Rockets,Thunder,,,,,,
7,2020-08-18,,1610612747,1610612757,Lakers,Trail Blazers,,,,,,


In [51]:
# creating the games dataframe
games_copy = games[pd.isna(games['TeamWinProba'])][['Date','TeamID', 'OpponentTeamID']]
games_copy_reversed = games_copy.copy()
games_copy_reversed['TeamID'] = games_copy['OpponentTeamID']
games_copy_reversed['OpponentTeamID'] = games_copy['TeamID']

all_games = pd.concat([games_copy, games_copy_reversed]).sort_index().reset_index(drop=True)

reg_season_ratings = leaguestandings.LeagueStandings(season='2019-20', season_type = 'Regular Season').get_data_frames()[0]
reg_season_ratings = reg_season_ratings[['TeamID','PointsPG','OppPointsPG']]

#ratings
all_games = pd.merge(all_games, reg_season_ratings, how='left', on='TeamID')
all_games = pd.merge(all_games, reg_season_ratings, how='left', left_on='OpponentTeamID', right_on='TeamID', suffixes=['','Opponent'])

all_games['SeriesGP'] = all_games.apply(lambda x: 1 + len(all_playoff_games[(all_playoff_games['TeamID'] == x['OpponentTeamID']) & (all_playoff_games['OpponentTeamID'] == x['TeamID'])]), axis = 1) 
all_games['PrevPTS'] = all_games.apply(lambda x: all_regseason_playoff_games[all_regseason_playoff_games['TeamID'] == x['TeamID']]['PTS'].iloc[0], axis = 1) 
all_games['PrevOPTS'] = all_games.apply(lambda x: all_regseason_playoff_games[all_regseason_playoff_games['TeamID'] == x['TeamID']]['OPTS'].iloc[0], axis = 1) 
#all_games['MissingMins'] = all_games.apply(lambda x: missing_mins(all_regseason_playoff_games[all_regseason_playoff_games['TeamID'] == x['TeamID']]['GameID'].iloc[0], x['TeamID'], '2019-20'), axis = 1)
first_game_missing_mins_copy = first_game_missing_mins.to_frame().reset_index()
first_game_missing_mins_copy.columns = ['TeamID', 'MissingMins']
all_games = pd.merge(all_games, first_game_missing_mins_copy, how='left', on='TeamID')


playoff_experience = pd.read_csv('playoff_experience.csv')
all_games = pd.merge(all_games, playoff_experience, how = 'left', on = 'TeamID')
all_games


Unnamed: 0,Date,TeamID,OpponentTeamID,PointsPG,OppPointsPG,TeamIDOpponent,PointsPGOpponent,OppPointsPGOpponent,SeriesGP,PrevPTS,PrevOPTS,MissingMins,Age,PlayoffMins
0,2020-08-17,1610612743,1610612762,111.3,109.2,1610612762,111.3,108.8,1,109,117,0.786909,391.0,6895.763333
1,2020-08-17,1610612762,1610612743,111.3,108.8,1610612743,111.3,109.2,1,118,112,0.87188,396.0,6328.35
2,2020-08-17,1610612761,1610612751,112.8,106.5,1610612751,111.8,112.3,1,117,109,0.801402,397.0,13204.62
3,2020-08-17,1610612751,1610612761,111.8,112.3,1610612761,112.8,106.5,1,133,134,0.773914,404.0,7254.545
4,2020-08-17,1610612738,1610612755,113.7,107.3,1610612755,110.7,108.4,1,90,96,0.803989,379.0,5772.186667
5,2020-08-17,1610612755,1610612738,110.7,108.4,1610612738,113.7,107.3,1,134,96,0.774972,403.0,7951.311667
6,2020-08-17,1610612746,1610612742,116.3,109.9,1610612742,117.0,112.1,1,107,103,0.763245,412.0,13322.215
7,2020-08-17,1610612742,1610612746,117.0,112.1,1610612746,116.3,109.9,1,102,128,0.756717,415.0,3609.745
8,2020-08-18,1610612749,1610612753,118.7,108.6,1610612753,107.3,108.3,1,106,119,0.726385,446.0,16320.056667
9,2020-08-18,1610612753,1610612749,107.3,108.3,1610612749,118.7,108.6,1,133,127,0.756219,393.0,4178.705


In [52]:
# bias boosted monte carlo simulation

In [53]:
import random
import pickle
import numpy as np
from sklearn.linear_model import LinearRegression

In [54]:
pts_model = pickle.load( open( "pts_model", "rb" ) )
opts_model = pickle.load( open( "opts_model", "rb" ) )
playoff_data = pd.read_csv('first_round_games.csv')
offense_cols = ['PlayoffMins','Age','SeriesGP','MissingMins','PointsPG','OppPointsPGOpponent','PrevPTS']
defense_cols = ['PlayoffMins','Age','SeriesGP','MissingMins','OppPointsPG','PointsPGOpponent','PrevOPTS']

In [55]:
def get_weight(x, col_value):
    return 1/(0.1+abs((x-col_value)/col_value))

def get_rand_sample(df, col_name, col_value, sample_col_name, N):
    weights = df[col_name].apply(lambda x: get_weight(x,col_value))
    to_return = random.choices(population=list(df[sample_col_name]), 
                               weights=weights,
                               k=N)
    return to_return

def get_predictions(playoff_games, cols, predictor, N, model = None):
    predictions = playoff_games[['Date', 'TeamID','OpponentTeamID']]
    col_sum = []
    for x in cols:
        col_sum.append(x + "Sum")
    for col in cols:
        predictions[col] = playoff_games.apply(lambda x: get_rand_sample(playoff_data,col,x[col],predictor,N), axis = 1)
    for col in range(len(cols)):
        predictions[col_sum[col]] = predictions[cols[col]].apply(lambda x: sum(x)/N)
    X = predictions[col_sum]
    
    coefs = pd.Series(model.coef_, index = col_sum).abs()
    # predictions[predictor + 'Combined'] = predictions[cols].apply(lambda x: (np.array(x.values.tolist()) * coefs.values[:, None]).sum(axis=0)/coefs.sum(), axis = 1)
    predictions[predictor + 'Combined'] = predictions[cols].apply(lambda x: model.predict(np.array(x.values.tolist()).transpose()), axis = 1)
    predictions[predictor + 'LinearRegression'] = model.predict(X)
    ICD.display(predictions)
    return predictions[[predictor + 'Combined', predictor + 'LinearRegression']]

In [56]:
pts_df = get_predictions(all_games, offense_cols, 'AdjustedPTS', 10000, pts_model)
opts_df = get_predictions(all_games, defense_cols, 'AdjustedOPTS', 10000, opts_model)
final = pd.concat([all_games, pts_df, opts_df], axis = 1)
final['WinProba'] = final.apply(lambda x: np.count_nonzero(x['AdjustedPTSCombined'] > x['AdjustedOPTSCombined'])/1000, axis = 1)
final['WinProbaScaled'] = final.apply(lambda x: x['WinProba']* 1/final[((final['SeriesGP'] == x['SeriesGP'])) & (((final['TeamID'] == x['TeamID']) & (final['OpponentTeamID'] == x['OpponentTeamID'])) | ((final['TeamID'] == x['OpponentTeamID']) & (final['OpponentTeamID'] == x['TeamID'])))]['WinProba'].sum(), axis = 1)
final['WinProba2x'] = final.apply(lambda x: max(0,min(1,2*x['WinProbaScaled']-0.5)), axis = 1)
ICD.display(final)
final = final[['Date','TeamID','OpponentTeamID','AdjustedPTSLinearRegression','AdjustedOPTSLinearRegression', 'WinProba2x']]

Unnamed: 0,Date,TeamID,OpponentTeamID,PlayoffMins,Age,SeriesGP,MissingMins,PointsPG,OppPointsPGOpponent,PrevPTS,PlayoffMinsSum,AgeSum,SeriesGPSum,MissingMinsSum,PointsPGSum,OppPointsPGOpponentSum,PrevPTSSum,AdjustedPTSCombined,AdjustedPTSLinearRegression
0,2020-08-17,1610612743,1610612762,"[98.86624442, 111.9658503, 92.80052454, 94.613...","[108.9397462, 106.1886618, 111.9658503, 101.06...","[110.0506706, 118.97063030000001, 92.32343892,...","[113.2842384, 116.99568149999999, 111.9089128,...","[117.5909064, 106.92234350000001, 112.9262665,...","[89.4079619, 103.0135213, 94.38992158, 105.045...","[119.95385870000001, 101.27252, 109.0592232, 1...",104.615547,104.62185,103.314147,104.344398,105.018064,104.739294,105.494902,"[175.45041462862832, 187.17752472752773, 110.7...",105.014157
1,2020-08-17,1610612762,1610612743,"[89.4079619, 134.2906953, 105.09343319999999, ...","[103.11053829999999, 96.83532996, 114.01646059...","[100.92595779999999, 87.4924227, 112.9262665, ...","[122.5529488, 109.1648115, 107.10509809999999,...","[126.3373375, 119.9651455, 113.02501310000001,...","[126.3373375, 101.87883670000001, 121.06509650...","[117.987402, 101.87883670000001, 104.9571727, ...",104.078629,104.715407,103.397688,104.607076,104.811512,105.066063,106.278209,"[224.62521069391755, 157.83416214117324, 165.8...",106.983888
2,2020-08-17,1610612761,1610612751,"[104.9049408, 82.29014111, 99.86143402, 82.713...","[93.8092259, 92.80052454, 107.93104479999998, ...","[112.9262665, 99.89610113, 109.816301, 101.127...","[116.3738085, 111.9658503, 88.23882601, 87.492...","[111.9658503, 102.0416957, 106.07524140000001,...","[88.49055148, 106.1886618, 95.17895839, 81.704...","[119.9651455, 93.71696086, 111.9089128, 98.153...",103.750824,104.602386,103.233438,104.776699,105.179091,105.544041,106.319048,"[183.19467744316262, 55.401586210358346, 47.62...",107.838911
3,2020-08-17,1610612751,1610612761,"[137.3427566, 103.11053829999999, 96.35637828,...","[100.92595779999999, 108.1551185, 99.70066773,...","[98.85273266, 108.9397462, 82.29014111, 110.78...","[117.987402, 99.86143402, 93.71696086, 113.025...","[104.9571727, 116.0209453, 109.87420520000002,...","[107.10509809999999, 114.960974, 129.2039265, ...","[83.42300769, 93.71696086, 108.1551185, 100.87...",105.335562,104.858696,103.353117,104.338945,105.012166,104.680193,106.303055,"[167.14244765383955, 96.93922116736553, 48.596...",108.100662
4,2020-08-17,1610612738,1610612755,"[94.38992158, 133.2733416, 87.4924227, 104.015...","[131.23863400000002, 105.0453847, 129.2039265,...","[86.25593104, 92.32343892, 106.07524140000001,...","[113.0712602, 128.8029138, 97.33960663, 113.98...","[94.81792725, 110.19466829999999, 84.44825044,...","[116.6190807, 112.9262665, 101.1276433, 126.33...","[93.71696086, 117.5909064, 99.70066773, 87.757...",104.366279,104.889857,103.408237,104.337964,105.554584,104.67954,102.960978,"[90.99660688092376, 280.2097637731058, 24.8919...",100.385808
5,2020-08-17,1610612755,1610612738,"[103.2389767, 115.0079081, 105.8047902, 109.16...","[106.07524140000001, 106.1886618, 103.98534699...","[97.84403131, 118.97063030000001, 105.8047902,...","[107.93104479999998, 111.9089128, 90.22172097,...","[90.22172097, 109.1383468, 100.0980443, 104.22...","[103.2389767, 94.74681757, 97.83638771, 101.73...","[127.3091631, 103.0135213, 116.0209453, 89.597...",105.181309,104.812059,103.436256,104.447878,104.693605,104.584007,106.550865,"[132.72362680088713, 169.75441849746767, 60.27...",108.331633
6,2020-08-17,1610612746,1610612742,"[113.94362029999999, 98.86624442, 86.25593104,...","[95.17895839, 104.9571727, 111.9658503, 112.92...","[109.1648115, 116.990803, 106.07524140000001, ...","[92.68710414, 137.3427566, 114.960974, 111.965...","[105.8047902, 100.0980443, 103.89623940000001,...","[129.2039265, 119.95385870000001, 108.1551185,...","[100.92595779999999, 95.63125272, 106.1886618,...",103.762134,104.534335,103.402204,104.202073,105.869761,105.340873,105.357918,"[93.19487102742391, 239.84408926083825, 128.77...",104.484055
7,2020-08-17,1610612742,1610612746,"[104.9049408, 100.92595779999999, 118.97063030...","[89.4079619, 115.9993555, 99.30606333, 109.164...","[114.31409509999999, 82.29014111, 102.2557484,...","[116.99568149999999, 101.06987, 96.83532996, 1...","[89.4079619, 91.44023653, 100.87013540000001, ...","[92.57919146, 116.0209453, 97.18256729, 103.11...","[109.0592232, 106.07524140000001, 99.86143402,...",105.192974,104.758078,103.401015,104.37127,105.828191,104.990744,103.979074,"[135.74987207957906, 56.00833520879178, 69.309...",104.669252
8,2020-08-18,1610612749,1610612753,"[104.9571727, 109.1648115, 104.7874365, 100.71...","[101.06987, 131.8216592, 120.0354611, 131.2386...","[114.01646059999999, 108.0677757, 109.816301, ...","[116.990803, 100.2892917, 111.9658503, 91.4402...","[112.9262665, 105.8047902, 120.0354611, 95.631...","[97.18256729, 120.0354611, 105.8047902, 112.08...","[94.61389896, 98.15330084, 104.015528, 84.4482...",104.579754,104.495728,103.139184,104.423138,105.929476,104.983085,104.792491,"[151.6491807824384, 126.58822047326248, 176.69...",104.974354
9,2020-08-18,1610612753,1610612749,"[97.66596022, 100.92595779999999, 101.9558146,...","[94.74681757, 113.0712602, 109.0592232, 104.01...","[94.18751091, 110.19466829999999, 102.1190908,...","[100.0980443, 113.2842384, 97.84403131, 103.89...","[89.4079619, 111.7599524, 94.26709027, 92.3234...","[97.83638771, 108.9397462, 97.18256729, 102.25...","[129.2039265, 103.11053829999999, 131.23863400...",104.868546,104.702667,103.666585,104.170867,103.839533,104.787121,106.285832,"[73.63582859714393, 160.60506248300385, 105.18...",105.413628


Unnamed: 0,Date,TeamID,OpponentTeamID,PlayoffMins,Age,SeriesGP,MissingMins,OppPointsPG,PointsPGOpponent,PrevOPTS,PlayoffMinsSum,AgeSum,SeriesGPSum,MissingMinsSum,OppPointsPGSum,PointsPGOpponentSum,PrevOPTSSum,AdjustedOPTSCombined,AdjustedOPTSLinearRegression
0,2020-08-17,1610612743,1610612762,"[98.85273266, 92.57919146, 101.7353752, 105.04...","[103.2389767, 105.09343319999999, 131.23863400...","[111.224525, 87.4924227, 116.990803, 103.23897...","[87.4924227, 89.4079619, 105.0453847, 105.0453...","[89.4079619, 104.9571727, 95.17895839, 118.562...","[105.91364209999999, 84.44825044, 102.1190908,...","[102.1190908, 103.0135213, 82.59118139, 120.03...",104.80006,104.693407,103.472767,103.707953,104.674999,104.975452,106.461981,"[49.74165985896025, -4.735927442233788, 116.10...",105.532385
1,2020-08-17,1610612762,1610612743,"[101.1276433, 112.7317781, 92.20461594, 103.89...","[104.9571727, 94.26709027, 98.85273266, 103.98...","[87.75701777, 118.97063030000001, 111.224525, ...","[120.0354611, 109.87420520000002, 109.0592232,...","[109.816301, 107.10509809999999, 131.238634000...","[100.71802149999999, 104.9049408, 110.78812669...","[106.1886618, 115.0079081, 98.15330084, 103.23...",104.370975,104.761074,103.520335,104.216253,104.891841,104.643757,105.747519,"[114.30118873556057, 157.6400228377447, 113.99...",104.800599
2,2020-08-17,1610612761,1610612751,"[84.44825044, 98.32283498, 101.7353752, 99.861...","[98.15330084, 98.85273266, 101.27252, 102.2557...","[126.3373375, 82.29014111, 97.84403131, 119.95...","[105.0453847, 126.6723757, 100.92595779999999,...","[113.94362029999999, 89.4079619, 96.35637828, ...","[127.3091631, 122.5529488, 88.56767729, 122.55...","[110.0506706, 105.91364209999999, 87.75701777,...",103.788655,104.538634,103.586701,104.096557,104.572397,105.029456,105.404839,"[150.15788879918296, 121.87153615893817, 17.14...",103.084437
3,2020-08-17,1610612751,1610612761,"[100.87013540000001, 102.1190908, 112.08803190...","[109.816301, 100.0980443, 105.91364209999999, ...","[93.2952646, 118.97063030000001, 102.2557484, ...","[113.94362029999999, 94.38992158, 105.0453847,...","[111.9658503, 111.224525, 104.10198570000001, ...","[93.2952646, 100.92595779999999, 116.990803, 1...","[104.10198570000001, 131.23863400000002, 101.8...",105.655843,104.783115,103.564648,104.110153,105.371198,105.1998,105.98838,"[103.84821025027918, 143.45233160285397, 124.2...",107.780617
4,2020-08-17,1610612738,1610612755,"[92.57919146, 92.32343892, 101.87883670000001,...","[109.1383468, 105.8047902, 109.1648115, 106.92...","[87.75701777, 98.85273266, 75.28417767, 128.80...","[99.89610113, 97.84403131, 99.12621863, 105.04...","[111.7599524, 112.0335656, 116.990803, 106.922...","[106.92234350000001, 92.32343892, 103.89623940...","[92.20461594, 88.23882601, 116.0209453, 98.866...",104.436872,104.65196,103.536774,103.637272,104.592526,104.527118,103.358029,"[40.764556903297944, 22.123436526936302, 76.15...",98.271216
5,2020-08-17,1610612755,1610612738,"[109.1648115, 88.23882601, 117.5909064, 94.187...","[117.987402, 117.987402, 96.17040587, 100.7180...","[113.02501310000001, 81.70480965, 119.9651455,...","[87.4924227, 99.86143402, 99.70066773, 113.983...","[75.28417767, 91.56183771, 120.0354611, 100.71...","[105.8047902, 131.23863400000002, 102.2557484,...","[102.2557484, 105.8047902, 119.95385870000001,...",105.094093,104.627777,103.376377,103.932246,104.790403,105.394118,103.294054,"[82.06666256255608, 83.83374513731678, 157.489...",100.657887
6,2020-08-17,1610612746,1610612742,"[81.70480965, 112.9262665, 112.0335656, 111.75...","[75.35000873, 97.18256729, 114.01646059999999,...","[112.0335656, 91.79182319, 104.2222051, 104.01...","[105.0453847, 82.29014111, 127.3091631, 92.323...","[101.27252, 101.27252, 115.0079081, 115.647255...","[119.9651455, 108.0677757, 89.59753401, 108.13...","[105.8047902, 97.84403131, 105.09343319999999,...",103.815998,104.545883,103.316588,104.098592,104.691495,105.879414,104.117757,"[58.819014788901086, 14.402505327921745, 176.8...",101.329631
7,2020-08-17,1610612742,1610612746,"[101.27252, 129.1137733, 106.07524140000001, 1...","[113.02501310000001, 77.67000424, 109.87420520...","[97.84403131, 102.752729, 91.56183771, 96.2107...","[110.19466829999999, 110.19466829999999, 111.9...","[105.0453847, 128.8029138, 97.84403131, 114.96...","[89.59753401, 100.71802149999999, 113.983253, ...","[111.9658503, 106.1886618, 98.85273266, 97.844...",107.396734,104.467806,103.470571,104.011144,105.42205,105.667659,106.482182,"[113.95784847373193, 116.44799488833178, 109.3...",110.677196
8,2020-08-18,1610612749,1610612753,"[98.32283498, 94.38992158, 97.84403131, 97.844...","[118.97063030000001, 94.38992158, 94.74681757,...","[106.07524140000001, 97.84403131, 97.84403131,...","[104.9571727, 101.7353752, 96.35637828, 127.30...","[87.4924227, 134.2906953, 89.59753401, 134.290...","[118.97063030000001, 118.97063030000001, 98.15...","[89.59753401, 94.61389896, 95.17895839, 118.56...",103.484033,104.583879,103.506389,104.494068,105.02981,103.883414,106.445679,"[102.41745194816212, 73.81585313114056, 11.482...",104.529312
9,2020-08-18,1610612753,1610612749,"[106.1886618, 114.31409509999999, 82.29014111,...","[117.5909064, 108.85685149999999, 118.97063030...","[109.816301, 119.03038899999999, 101.1276433, ...","[121.06509650000001, 104.10198570000001, 104.2...","[117.5909064, 107.10509809999999, 104.015528, ...","[104.9049408, 114.31409509999999, 82.713511, 1...","[119.03038899999999, 131.23863400000002, 106.8...",106.735145,104.558665,103.658991,104.183998,104.59906,106.012945,106.612265,"[216.51610830474124, 213.03932599982568, 70.29...",110.87924


Unnamed: 0,Date,TeamID,OpponentTeamID,PointsPG,OppPointsPG,TeamIDOpponent,PointsPGOpponent,OppPointsPGOpponent,SeriesGP,PrevPTS,PrevOPTS,MissingMins,Age,PlayoffMins,AdjustedPTSCombined,AdjustedPTSLinearRegression,AdjustedOPTSCombined,AdjustedOPTSLinearRegression,WinProba,WinProbaScaled,WinProba2x
0,2020-08-17,1610612743,1610612762,111.3,109.2,1610612762,111.3,108.8,1,109,117,0.786909,391.0,6895.763333,"[175.45041462862832, 187.17752472752773, 110.7...",105.014157,"[49.74165985896025, -4.735927442233788, 116.10...",105.532385,4.956,0.493036,0.486072
1,2020-08-17,1610612762,1610612743,111.3,108.8,1610612743,111.3,109.2,1,118,112,0.87188,396.0,6328.35,"[224.62521069391755, 157.83416214117324, 165.8...",106.983888,"[114.30118873556057, 157.6400228377447, 113.99...",104.800599,5.096,0.506964,0.513928
2,2020-08-17,1610612761,1610612751,112.8,106.5,1610612751,111.8,112.3,1,117,109,0.801402,397.0,13204.62,"[183.19467744316262, 55.401586210358346, 47.62...",107.838911,"[150.15788879918296, 121.87153615893817, 17.14...",103.084437,5.215,0.510974,0.521948
3,2020-08-17,1610612751,1610612761,111.8,112.3,1610612761,112.8,106.5,1,133,134,0.773914,404.0,7254.545,"[167.14244765383955, 96.93922116736553, 48.596...",108.100662,"[103.84821025027918, 143.45233160285397, 124.2...",107.780617,4.991,0.489026,0.478052
4,2020-08-17,1610612738,1610612755,113.7,107.3,1610612755,110.7,108.4,1,90,96,0.803989,379.0,5772.186667,"[90.99660688092376, 280.2097637731058, 24.8919...",100.385808,"[40.764556903297944, 22.123436526936302, 76.15...",98.271216,5.065,0.484828,0.469656
5,2020-08-17,1610612755,1610612738,110.7,108.4,1610612738,113.7,107.3,1,134,96,0.774972,403.0,7951.311667,"[132.72362680088713, 169.75441849746767, 60.27...",108.331633,"[82.06666256255608, 83.83374513731678, 157.489...",100.657887,5.382,0.515172,0.530344
6,2020-08-17,1610612746,1610612742,116.3,109.9,1610612742,117.0,112.1,1,107,103,0.763245,412.0,13322.215,"[93.19487102742391, 239.84408926083825, 128.77...",104.484055,"[58.819014788901086, 14.402505327921745, 176.8...",101.329631,5.131,0.519964,0.539927
7,2020-08-17,1610612742,1610612746,117.0,112.1,1610612746,116.3,109.9,1,102,128,0.756717,415.0,3609.745,"[135.74987207957906, 56.00833520879178, 69.309...",104.669252,"[113.95784847373193, 116.44799488833178, 109.3...",110.677196,4.737,0.480036,0.460073
8,2020-08-18,1610612749,1610612753,118.7,108.6,1610612753,107.3,108.3,1,106,119,0.726385,446.0,16320.056667,"[151.6491807824384, 126.58822047326248, 176.69...",104.974354,"[102.41745194816212, 73.81585313114056, 11.482...",104.529312,4.996,0.516703,0.533406
9,2020-08-18,1610612753,1610612749,107.3,108.3,1610612749,118.7,108.6,1,133,127,0.756219,393.0,4178.705,"[73.63582859714393, 160.60506248300385, 105.18...",105.413628,"[216.51610830474124, 213.03932599982568, 70.29...",110.87924,4.673,0.483297,0.466594


In [57]:
for x in range(1, len(final),2):
    temp = final['AdjustedPTSLinearRegression'].iloc[x]
    final['AdjustedPTSLinearRegression'].iloc[x] = final['AdjustedOPTSLinearRegression'].iloc[x]
    final['AdjustedOPTSLinearRegression'].iloc[x] = temp
final =final.groupby(np.arange(len(final))//2).agg({'Date': 'first',
                                            'TeamID':'first',
                                             'OpponentTeamID':'first',
                                             'AdjustedPTSLinearRegression':'mean',
                                             'AdjustedOPTSLinearRegression':'mean',
                                             'WinProba2x':'first'})

In [58]:
import scipy.stats
norm_dist = scipy.stats.norm(0, 10)
final['ProbaNorm'] = final.apply(lambda x: norm_dist.cdf(x['AdjustedPTSLinearRegression'] - x['AdjustedOPTSLinearRegression']), axis = 1)
final

Unnamed: 0,Date,TeamID,OpponentTeamID,AdjustedPTSLinearRegression,AdjustedOPTSLinearRegression,WinProba2x,ProbaNorm
0,2020-08-17,1610612743,1610612762,104.907378,106.258136,0.486072,0.446276
1,2020-08-17,1610612761,1610612751,107.809764,105.59255,0.521948,0.587735
2,2020-08-17,1610612738,1610612755,100.521847,103.301425,0.469656,0.390522
3,2020-08-17,1610612746,1610612742,107.580625,102.999441,0.539927,0.676566
4,2020-08-18,1610612749,1610612753,107.926797,104.97147,0.533406,0.616207
5,2020-08-18,1610612754,1610612748,105.55931,100.580296,0.556782,0.690723
6,2020-08-18,1610612745,1610612760,104.115003,104.283525,0.494876,0.493277
7,2020-08-18,1610612747,1610612757,108.093849,106.033605,0.521537,0.581614


In [59]:
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual
0,2020-08-17,,1610612743,1610612762,Nuggets,Jazz,,,,,,
1,2020-08-17,,1610612761,1610612751,Raptors,Nets,,,,,,
2,2020-08-17,,1610612738,1610612755,Celtics,76ers,,,,,,
3,2020-08-17,,1610612746,1610612742,Clippers,Mavericks,,,,,,
4,2020-08-18,,1610612749,1610612753,Bucks,Magic,,,,,,
5,2020-08-18,,1610612754,1610612748,Pacers,Heat,,,,,,
6,2020-08-18,,1610612745,1610612760,Rockets,Thunder,,,,,,
7,2020-08-18,,1610612747,1610612757,Lakers,Trail Blazers,,,,,,


In [60]:
def get_game_info_calculated(row, final_col, games_col):
    if(not pd.isna(row[games_col])):
        return row[games_col]
    temp = final[final['TeamID'] == row['TeamID']]
    temp = final[final['OpponentTeamID'] == row['OpponentTeamID']]
    return temp[final_col].iloc[0]

games['TeamWinProba'] = games.apply(lambda x: get_game_info_calculated(x, 'ProbaNorm', 'TeamWinProba'), axis = 1)
games['OpponentTeamWinProba'] = games.apply(lambda x: 1 - x['TeamWinProba'], axis = 1)
games['TeamPoints'] = games.apply(lambda x: get_game_info_calculated(x, 'AdjustedPTSLinearRegression', 'TeamPoints'), axis = 1)
games['OpponentTeamPoints'] = games.apply(lambda x: get_game_info_calculated(x, 'AdjustedOPTSLinearRegression', 'OpponentTeamPoints'), axis = 1)
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual
0,2020-08-17,,1610612743,1610612762,Nuggets,Jazz,0.446276,0.553724,104.907378,106.258136,,
1,2020-08-17,,1610612761,1610612751,Raptors,Nets,0.587735,0.412265,107.809764,105.59255,,
2,2020-08-17,,1610612738,1610612755,Celtics,76ers,0.390522,0.609478,100.521847,103.301425,,
3,2020-08-17,,1610612746,1610612742,Clippers,Mavericks,0.676566,0.323434,107.580625,102.999441,,
4,2020-08-18,,1610612749,1610612753,Bucks,Magic,0.616207,0.383793,107.926797,104.97147,,
5,2020-08-18,,1610612754,1610612748,Pacers,Heat,0.690723,0.309277,105.55931,100.580296,,
6,2020-08-18,,1610612745,1610612760,Rockets,Thunder,0.493277,0.506723,104.115003,104.283525,,
7,2020-08-18,,1610612747,1610612757,Lakers,Trail Blazers,0.581614,0.418386,108.093849,106.033605,,


In [65]:
from datetime import date

today = date.today()

pd.read_csv('predictions/games.csv').to_csv('predictions/games_' + today.strftime("%d-%m-%Y") +'_before.csv', index = False)
games.to_csv('predictions/games.csv', index = False)
games.to_csv('predictions/games_' + today.strftime("%d-%m-%Y") +'_after.csv', index = False)