In [21]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashplayerstats, leaguedashteamclutch, leaguegamelog, boxscoreplayertrackv2, playercareerstats, teamplayerdashboard, leaguestandings
from nba_api.stats.static import players, teams
import time
pd.set_option('display.max_columns', None)
from IPython import display as ICD
import warnings
from datetime import date
import pickle
warnings.filterwarnings('ignore')

In [22]:
abbrv_mapping = {'DEN':"1610612743", 
           'SAS':"1610612759", 
           'IND':"1610612754", 
           'MEM':"1610612763", 
           'NOP':"1610612740", 
           'CHI':"1610612741", 
           'MIN':"1610612750", 
           'MIA':"1610612748", 
           'ATL':"1610612737",
           'ORL':"1610612753", 
           'SAC':"1610612758", 
           'UTA':"1610612762", 
           'CHA':"1610612766", 
           'WAS':"1610612764", 
           'HOU':"1610612745", 
           'PHX':"1610612756", 
           'OKC':"1610612760", 
           'POR':"1610612757",
           'LAC':"1610612746", 
           'GSW':"1610612744", 
           'PHI':"1610612755", 
           'LAL':"1610612747", 
           'BKN':"1610612751", 
           'TOR':"1610612761", 
           'BOS':"1610612738", 
           'MIL':"1610612749", 
           'DET':"1610612765",
           'CLE':"1610612739", 
           'NYK':"1610612752",
           'DAL':"1610612742"}

name_mapping = {'Nuggets':"1610612743", 
           'Spurs':"1610612759", 
           'Pacers':"1610612754", 
           'Memphis':"1610612763", 
           'Pelicans':"1610612740", 
           'Bulls':"1610612741", 
           'Timberwolves':"1610612750", 
           'Heat':"1610612748", 
           'Hawks':"1610612737",
           'Magic':"1610612753", 
           'Kings':"1610612758", 
           'Jazz':"1610612762", 
           'Hornets':"1610612766", 
           'Wizards':"1610612764", 
           'Rockets':"1610612745", 
           'Suns':"1610612756", 
           'Thunder':"1610612760", 
           'Trail Blazers':"1610612757",
           'Clippers':"1610612746", 
           'Warriors':"1610612744", 
           '76ers':"1610612755", 
           'Lakers':"1610612747", 
           'Nets':"1610612751", 
           'Raptors':"1610612761", 
           'Celtics':"1610612738", 
           'Bucks':"1610612749", 
           'Pistons':"1610612765",
           'Cavaliers':"1610612739", 
           'Knicks':"1610612752",
           'Mavericks':"1610612742"}

In [23]:
def clean_all_playoff_games(season_name):
    '''
    clean_all_playoff_games - add a few columns and remove irrelevant ones
    '''
    all_games = leaguegamelog.LeagueGameLog(season_type_all_star='Playoffs', season=season_name).get_data_frames()[0]
    all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE']) 
    all_games = all_games.sort_values('GAME_DATE',ascending = False)
    all_games['OpponentTeamID'] = all_games['MATCHUP'].apply(lambda x: int(abbrv_mapping[x[-3:]]))
    all_games['OPTS'] = all_games['PTS'] - all_games['PLUS_MINUS']
    all_games = all_games[['GAME_ID','GAME_DATE','TEAM_ID','OpponentTeamID','MATCHUP','PTS','OPTS']]
    return all_games.rename(columns = {'GAME_ID':'GameID','GAME_DATE':'GameDate', 'TEAM_ID':'TeamID'})

def clean_all_games(season_name):
    '''
    clean_all_playoff_games - add a few columns and remove irrelevant ones
    '''
    all_games = leaguegamelog.LeagueGameLog(season=season_name).get_data_frames()[0]
    all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE']) 
    all_games = all_games.sort_values('GAME_DATE',ascending = False)
    all_games['OpponentTeamID'] = all_games['MATCHUP'].apply(lambda x: int(abbrv_mapping[x[-3:]]))
    all_games['OPTS'] = all_games['PTS'] - all_games['PLUS_MINUS']
    all_games = all_games[['GAME_ID','GAME_DATE','TEAM_ID','OpponentTeamID','MATCHUP','PTS','OPTS']]
    return all_games.rename(columns = {'GAME_ID':'GameID','GAME_DATE':'GameDate', 'TEAM_ID':'TeamID'})

all_playoff_games = clean_all_playoff_games('2019-20')
all_regseason_playoff_games = clean_all_games('2019-20')
all_reg_season_playoff_games = pd.concat([all_playoff_games, all_regseason_playoff_games]).sort_values('GameDate',ascending = False)
all_reg_season_playoff_games.head(20)

Unnamed: 0,GameID,GameDate,TeamID,OpponentTeamID,MATCHUP,PTS,OPTS
63,41900144,2020-08-24,1610612747,1610612757,LAL @ POR,135,115
58,41900174,2020-08-24,1610612745,1610612760,HOU @ OKC,114,117
56,41900104,2020-08-24,1610612749,1610612753,MIL @ ORL,121,106
57,41900104,2020-08-24,1610612753,1610612749,ORL vs. MIL,106,121
62,41900144,2020-08-24,1610612757,1610612747,POR vs. LAL,115,135
59,41900174,2020-08-24,1610612760,1610612745,OKC vs. HOU,117,114
61,41900134,2020-08-24,1610612748,1610612754,MIA vs. IND,99,87
60,41900134,2020-08-24,1610612754,1610612748,IND @ MIA,87,99
52,41900114,2020-08-23,1610612761,1610612751,TOR @ BKN,150,122
48,41900124,2020-08-23,1610612738,1610612755,BOS @ PHI,110,106


In [24]:
player_rotation = pd.read_csv('players_rotation.csv')
players_df = pd.read_csv('players.csv')
first_game_missing_mins = player_rotation.groupby('TEAM_ID').sum()['MIN'] / players_df.groupby('TEAM_ID').sum()['MIN']

In [25]:
def missing_mins(curr_game_id, team_id, season_name):
    '''
    missing_mins: gets the fraction of the season's minutes played by injured/inactive/traded players
    '''
    time.sleep(0.5)
    # get the season stats of the team
    season_stats = teamplayerdashboard.TeamPlayerDashboard(team_id, season=season_name).get_data_frames()[1]
    
    # get the game currently being assessed
    curr_game = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=curr_game_id).get_data_frames()[0]
    
    player_game_stats = curr_game[curr_game['TEAM_ID'] == int(team_id)]
    combined = pd.merge(season_stats, player_game_stats, how='left', on='PLAYER_ID')
    
    total_mins = combined['MIN_x'].sum()
    missing_mins = combined[(combined['MIN_y'] == "0:00") | (combined['MIN_y'].isna())]['MIN_x'].sum()
    return (total_mins - missing_mins) / total_mins

In [26]:
games = pd.read_csv('predictions/games.csv')
games['Date'] = games['Date'].astype('datetime64')
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual,GameNum,Unnamed: 13,Unnamed: 14
0,2020-08-17,41900161.0,1610613000.0,1610613000.0,Nuggets,Jazz,0.446276,0.553724,104.907378,106.258136,135.0,125.0,1.0,,
1,2020-08-17,41900111.0,1610613000.0,1610613000.0,Raptors,Nets,0.587735,0.412265,107.809764,105.59255,134.0,110.0,1.0,,
2,2020-08-17,41900121.0,1610613000.0,1610613000.0,Celtics,76ers,0.390522,0.609478,100.521847,103.301425,109.0,101.0,1.0,,
3,2020-08-17,41900151.0,1610613000.0,1610613000.0,Clippers,Mavericks,0.676566,0.323434,107.580625,102.999441,118.0,110.0,1.0,,
4,2020-08-18,41900101.0,1610613000.0,1610613000.0,Bucks,Magic,0.616207,0.383793,107.926797,104.97147,110.0,122.0,1.0,,
5,2020-08-18,41900131.0,1610613000.0,1610613000.0,Pacers,Heat,0.690723,0.309277,105.55931,100.580296,101.0,113.0,1.0,,
6,2020-08-18,41900171.0,1610613000.0,1610613000.0,Rockets,Thunder,0.493277,0.506723,104.115003,104.283525,123.0,108.0,1.0,,
7,2020-08-18,41900141.0,1610613000.0,1610613000.0,Lakers,Trail Blazers,0.581614,0.418386,108.093849,106.033605,93.0,100.0,1.0,,
8,2020-08-19,41900162.0,1610613000.0,1610613000.0,Nuggets,Jazz,0.683257,0.316743,111.777117,107.008869,105.0,124.0,2.0,,
9,2020-08-19,41900112.0,1610613000.0,1610613000.0,Raptors,Nets,0.927164,0.072836,118.184497,103.634586,104.0,99.0,2.0,,


In [27]:
def get_gameid(row):
    if(not pd.isna(row['GameID'])):
        return row['GameID']
    temp = all_playoff_games[all_playoff_games['TeamID'] == row['TeamID']]
    temp = all_playoff_games[all_playoff_games['OpponentTeamID'] == row['OpponentTeamID']]
    temp = temp[temp['GameDate'].apply(lambda x: x.date() == row['Date'].date())]
    if(len(temp) != 0):
        return temp['GameID'].iloc[0]
    return row['GameID']
def get_game_info(row, attribute, alt_attribute):
    if(pd.isna(row['GameID'])):
        return row[alt_attribute]
    try:
        return all_playoff_games[(all_playoff_games['GameID'] == row['GameID']) & (all_playoff_games['TeamID'] == row['TeamID'])][attribute].iloc[0]
    except:
        return row[alt_attribute]
def get_game_num(row):
    if(not pd.isna(row['GameNum'])):
        return row['GameNum']
    temp = all_playoff_games[all_playoff_games['TeamID'] == row['TeamID']]
    temp = all_playoff_games[all_playoff_games['OpponentTeamID'] == row['OpponentTeamID']]
    return len(temp) + 1

games['TeamID'] = games['TeamName'].apply(lambda x: int(name_mapping[x]))
games['OpponentTeamID'] = games['OpponentTeamName'].apply(lambda x: int(name_mapping[x]))
games['GameID'] = games.apply(lambda x: get_gameid(x), axis = 1)
games['Date'] = games.apply(lambda x: get_game_info(x, 'GameDate', 'Date'), axis = 1)
games['TeamPointsActual'] = games.apply(lambda x: get_game_info(x, 'PTS', 'TeamPointsActual'), axis = 1)
games['OpponentTeamPointsActual'] = games.apply(lambda x: get_game_info(x, 'OPTS', 'OpponentTeamPointsActual'), axis = 1)
games['GameNum'] = games.apply(lambda x: get_game_num(x), axis = 1)
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual,GameNum,Unnamed: 13,Unnamed: 14
0,2020-08-17,41900200.0,1610612743,1610612762,Nuggets,Jazz,0.446276,0.553724,104.907378,106.258136,135.0,125.0,1.0,,
1,2020-08-17,41900100.0,1610612761,1610612751,Raptors,Nets,0.587735,0.412265,107.809764,105.59255,134.0,110.0,1.0,,
2,2020-08-17,41900100.0,1610612738,1610612755,Celtics,76ers,0.390522,0.609478,100.521847,103.301425,109.0,101.0,1.0,,
3,2020-08-17,41900200.0,1610612746,1610612742,Clippers,Mavericks,0.676566,0.323434,107.580625,102.999441,118.0,110.0,1.0,,
4,2020-08-18,41900100.0,1610612749,1610612753,Bucks,Magic,0.616207,0.383793,107.926797,104.97147,110.0,122.0,1.0,,
5,2020-08-18,41900100.0,1610612754,1610612748,Pacers,Heat,0.690723,0.309277,105.55931,100.580296,101.0,113.0,1.0,,
6,2020-08-18,41900200.0,1610612745,1610612760,Rockets,Thunder,0.493277,0.506723,104.115003,104.283525,123.0,108.0,1.0,,
7,2020-08-18,41900100.0,1610612747,1610612757,Lakers,Trail Blazers,0.581614,0.418386,108.093849,106.033605,93.0,100.0,1.0,,
8,2020-08-19,41900200.0,1610612743,1610612762,Nuggets,Jazz,0.683257,0.316743,111.777117,107.008869,105.0,124.0,2.0,,
9,2020-08-19,41900100.0,1610612761,1610612751,Raptors,Nets,0.927164,0.072836,118.184497,103.634586,104.0,99.0,2.0,,


In [28]:
def get_series_lead(games, curr_game):
    '''
    gets_series_length - get the game number of the series
    '''
    series = games[(games['TeamID'] == curr_game['TeamID']) & (games['OpponentTeamID'] == curr_game['OpponentTeamID'])]
    len_series = len(series)
    games_won = len(series[series['PTS'] > series['OPTS']])
    return 2 * games_won - len_series

def get_series_win_percent(games, curr_game):
    '''
    gets_series_length - get the game number of the series
    '''
    series = games[(games['TeamID'] == curr_game['TeamID']) & (games['OpponentTeamID'] == curr_game['OpponentTeamID'])]
    len_series = len(series) + 1
    games_won = len(series[series['PTS'] > series['OPTS']])
    return games_won/len_series

In [29]:
# creating the games dataframe
games_copy = games[pd.isna(games['TeamWinProba'])][['Date','TeamID', 'OpponentTeamID']]
games_copy_reversed = games_copy.copy()
games_copy_reversed['TeamID'] = games_copy['OpponentTeamID']
games_copy_reversed['OpponentTeamID'] = games_copy['TeamID']

all_games = pd.concat([games_copy, games_copy_reversed]).sort_index().reset_index(drop=True)

reg_season_ratings = leaguestandings.LeagueStandings(season='2019-20', season_type = 'Regular Season').get_data_frames()[0]
reg_season_ratings = reg_season_ratings[['TeamID','PointsPG','OppPointsPG']]

#ratings
all_games = pd.merge(all_games, reg_season_ratings, how='left', on='TeamID')
all_games = pd.merge(all_games, reg_season_ratings, how='left', left_on='OpponentTeamID', right_on='TeamID', suffixes=['','Opponent'])

all_games['SeriesGP'] = all_games.apply(lambda x: 1 + len(all_playoff_games[(all_playoff_games['TeamID'] == x['OpponentTeamID']) & (all_playoff_games['OpponentTeamID'] == x['TeamID'])]), axis = 1) 
all_games['SeriesLead'] = all_games.apply(lambda x: get_series_lead(all_playoff_games, x), axis = 1) 
all_games['SeriesWinPercent'] = all_games.apply(lambda x: get_series_win_percent(all_playoff_games, x), axis = 1) 





all_games['PrevPTS'] = all_games.apply(lambda x: all_playoff_games[all_playoff_games['TeamID'] == x['TeamID']]['PTS'].iloc[0], axis = 1) 
all_games['PrevOPTS'] = all_games.apply(lambda x: all_playoff_games[all_playoff_games['TeamID'] == x['TeamID']]['OPTS'].iloc[0], axis = 1) 
all_games['PrevMargin'] = all_games['PrevPTS'] - all_games['PrevOPTS']


all_games['MissingMins'] = all_games.apply(lambda x: missing_mins(all_playoff_games[all_playoff_games['TeamID'] == x['TeamID']]['GameID'].iloc[0], x['TeamID'], '2019-20'), axis = 1)
# first_game_missing_mins_copy = first_game_missing_mins.to_frame().reset_index()
# first_game_missing_mins_copy.columns = ['TeamID', 'MissingMins']
# all_games = pd.merge(all_games, first_game_missing_mins_copy, how='left', on='TeamID')


playoff_experience = pd.read_csv('playoff_experience.csv')
all_games = pd.merge(all_games, playoff_experience, how = 'left', on = 'TeamID')
all_games


Unnamed: 0,Date,TeamID,OpponentTeamID,PointsPG,OppPointsPG,TeamIDOpponent,PointsPGOpponent,OppPointsPGOpponent,SeriesGP,SeriesLead,SeriesWinPercent,PrevPTS,PrevOPTS,PrevMargin,MissingMins,Age,PlayoffMins
0,2020-08-25,1610612743,1610612762,111.3,109.2,1610612762,111.3,108.8,5,-2,0.2,127,129,-2,0.682784,391.0,6895.763333
1,2020-08-25,1610612762,1610612743,111.3,108.8,1610612743,111.3,109.2,5,2,0.6,129,127,2,0.749396,396.0,6328.35
2,2020-08-25,1610612746,1610612742,116.3,109.9,1610612742,117.0,112.1,5,0,0.4,133,135,-2,0.704119,412.0,13322.215
3,2020-08-25,1610612742,1610612746,117.0,112.1,1610612746,116.3,109.9,5,0,0.4,135,133,2,0.657621,415.0,3609.745
4,2020-08-26,1610612749,1610612753,118.7,108.6,1610612753,107.3,108.3,5,2,0.6,121,106,15,0.862176,446.0,16320.056667
5,2020-08-26,1610612753,1610612749,107.3,108.3,1610612749,118.7,108.6,5,-2,0.2,106,121,-15,0.688603,393.0,4178.705
6,2020-08-26,1610612745,1610612760,117.8,114.8,1610612760,110.4,108.4,5,0,0.4,114,117,-3,0.703378,434.0,16812.746667
7,2020-08-26,1610612760,1610612745,110.4,108.4,1610612745,117.8,114.8,5,0,0.4,117,114,3,0.830964,381.0,7951.911667
8,2020-08-26,1610612747,1610612757,113.4,107.6,1610612757,115.0,116.1,5,2,0.6,135,115,20,0.839585,454.0,29173.07
9,2020-08-26,1610612757,1610612747,115.0,116.1,1610612747,113.4,107.6,5,-2,0.2,115,135,-20,0.735767,387.0,11450.935


In [30]:
# bias boosted monte carlo simulation

In [31]:
import random
import matplotlib.pyplot as plt
import numpy as np

from sklearn.linear_model import LinearRegression
def get_weight(x, col_value, range_val):
    return 1/(00.1+abs((x-col_value)/range_val))
def get_rand_sample(df, col_name, col_value, sample_col_name, N, range_val):
    weights = df[col_name].apply(lambda x: get_weight(x,col_value, range_val))
    to_return = random.choices(population=list(df[sample_col_name]), 
                               weights=weights,
                               k=N)
    return to_return

In [32]:
pts_model = pickle.load( open( "pts_model", "rb" ) )
opts_model = pickle.load( open( "opts_model", "rb" ) )
range_vals = pickle.load( open( "range_vals", "rb" ) )
playoff_data = pd.read_csv('all_playoff_games.csv')
offense_cols = ['PlayoffMins','Age','SeriesGP','SeriesLead', 'SeriesWinPercent', 'MissingMins','PrevMargin','PointsPG','OppPointsPGOpponent','PrevPTS']
defense_cols = ['PlayoffMins','Age','SeriesGP','SeriesLead', 'SeriesWinPercent', 'MissingMins','PrevMargin','OppPointsPG','PointsPGOpponent','PrevOPTS']

In [33]:
def get_predictions(playoff_games, cols, predictor, N, model = None):
    predictions = playoff_games[['Date', 'TeamID','OpponentTeamID']]
    col_sum = []
    for x in cols:
        col_sum.append(x + "Sum")
    for col in cols:
        predictions[col] = playoff_games.apply(lambda x: get_rand_sample(playoff_data,col,x[col],predictor,N,range_vals[col]), axis = 1)
    for col in range(len(cols)):
        predictions[col_sum[col]] = predictions[cols[col]].apply(lambda x: sum(x)/N)
    X = predictions[col_sum]
    
    coefs = pd.Series(model.coef_, index = col_sum).abs()
    # predictions[predictor + 'Combined'] = predictions[cols].apply(lambda x: (np.array(x.values.tolist()) * coefs.values[:, None]).sum(axis=0)/coefs.sum(), axis = 1)
    predictions[predictor + 'Combined'] = predictions[cols].apply(lambda x: model.predict(np.array(x.values.tolist()).transpose()), axis = 1)
    predictions[predictor + 'LinearRegression'] = model.predict(X)
    temp = predictions[['TeamID'] + col_sum + [predictor+'LinearRegression']]
    temp['Name'] = temp['TeamID'].apply(lambda x: list(name_mapping.keys())[list(pd.Series(list(name_mapping.values())).astype(int)).index(x)])
    ICD.display(temp)
    temp.to_csv('predictions_intermed/' + predictor+date.today().strftime('%d-%m-%Y') + '.csv', index = False)
    return predictions[[predictor + 'Combined', predictor + 'LinearRegression']]

In [34]:
pts_df = get_predictions(all_games, offense_cols, 'AdjustedPTS', 10000, pts_model)
opts_df = get_predictions(all_games, defense_cols, 'AdjustedOPTS', 10000, opts_model)
final = pd.concat([all_games, pts_df, opts_df], axis = 1)
final['WinProba'] = final.apply(lambda x: np.count_nonzero(x['AdjustedPTSCombined'] > x['AdjustedOPTSCombined'])/1000, axis = 1)
final['WinProbaScaled'] = final.apply(lambda x: x['WinProba']* 1/final[((final['SeriesGP'] == x['SeriesGP'])) & (((final['TeamID'] == x['TeamID']) & (final['OpponentTeamID'] == x['OpponentTeamID'])) | ((final['TeamID'] == x['OpponentTeamID']) & (final['OpponentTeamID'] == x['TeamID'])))]['WinProba'].sum(), axis = 1)
final['WinProba2x'] = final.apply(lambda x: max(0,min(1,2*x['WinProbaScaled']-0.5)), axis = 1)
ICD.display(final)
final = final[['Date','TeamID','OpponentTeamID','AdjustedPTSLinearRegression','AdjustedOPTSLinearRegression', 'WinProba2x']]

Unnamed: 0,TeamID,PlayoffMinsSum,AgeSum,SeriesGPSum,SeriesLeadSum,SeriesWinPercentSum,MissingMinsSum,PrevMarginSum,PointsPGSum,OppPointsPGOpponentSum,PrevPTSSum,AdjustedPTSLinearRegression,Name
0,1610612743,105.011317,106.039277,105.786187,105.322353,105.54657,105.132686,105.872417,106.331738,106.09631,107.307715,104.164128,Nuggets
1,1610612762,105.209824,106.227884,105.727292,107.121638,106.759287,105.648645,106.067705,106.175462,106.079402,107.25533,106.173661,Jazz
2,1610612746,105.620334,106.083055,105.640941,105.368204,105.088813,105.692268,105.951209,108.398913,107.162885,107.285627,109.993346,Clippers
3,1610612742,105.345744,106.127344,105.625322,105.732065,105.228016,105.57628,106.322613,108.515635,106.151168,107.248067,108.325594,Mavericks
4,1610612749,106.132441,106.139426,105.437031,106.953831,106.824518,106.477284,106.99536,108.340725,105.877453,107.741829,112.459469,Bucks
5,1610612753,105.432251,105.765553,105.561727,105.385891,105.768322,105.411842,105.659248,104.457356,105.901056,106.282489,99.872345,Magic
6,1610612745,106.168141,106.128714,105.541255,105.518984,105.147122,105.489985,105.903604,108.767511,105.830687,107.438203,108.296902,Rockets
7,1610612760,105.073651,105.952232,105.540569,105.296318,105.090222,105.972789,106.130012,106.324573,107.448456,107.551842,107.761163,Thunder
8,1610612747,107.845573,106.026645,105.672908,106.987424,106.774807,106.388088,107.05255,107.234159,106.985203,107.179183,114.528151,Lakers
9,1610612757,104.870149,105.927467,105.391106,105.271349,105.235899,105.605239,105.558901,108.155188,105.800738,107.498785,105.373259,Trail Blazers


Unnamed: 0,TeamID,PlayoffMinsSum,AgeSum,SeriesGPSum,SeriesLeadSum,SeriesWinPercentSum,MissingMinsSum,PrevMarginSum,OppPointsPGSum,PointsPGOpponentSum,PrevOPTSSum,AdjustedOPTSLinearRegression,Name
0,1610612743,106.126353,106.158068,105.639081,106.855304,106.833165,105.956367,105.982656,106.083173,106.258006,107.620613,108.690601,Nuggets
1,1610612762,106.263326,106.212401,105.520462,105.703347,105.312687,105.593,105.741479,106.049904,106.337428,107.620057,106.302318,Jazz
2,1610612746,105.548777,106.230311,105.787005,105.570487,105.119223,105.841597,106.273761,106.121959,108.794007,107.633595,110.824344,Clippers
3,1610612742,106.937163,106.061799,105.539653,105.790215,105.137954,106.388289,105.89196,107.470308,108.491949,107.429012,115.855987,Mavericks
4,1610612749,105.176532,106.221234,105.358953,105.581809,105.371261,106.147607,105.657852,105.91888,104.32652,105.768997,99.505404,Bucks
5,1610612753,106.657798,105.954974,105.733982,107.02166,106.644286,105.983089,106.939203,105.728041,108.797701,107.750704,113.857908,Magic
6,1610612745,105.201961,106.209351,105.631042,105.654377,104.814558,105.83409,106.134796,107.334578,106.264111,107.441808,107.82743,Rockets
7,1610612760,106.298622,105.986922,105.428633,105.501674,105.056951,105.503775,105.657034,106.051272,108.902839,107.084742,109.338376,Thunder
8,1610612747,105.825795,106.158297,105.639943,105.475614,105.385628,105.644376,105.406918,105.899385,108.027776,107.152818,107.710259,Lakers
9,1610612757,105.84875,106.011928,105.532977,106.843613,106.692547,106.012217,106.94745,106.861423,107.293112,107.350539,111.646657,Trail Blazers


Unnamed: 0,Date,TeamID,OpponentTeamID,PointsPG,OppPointsPG,TeamIDOpponent,PointsPGOpponent,OppPointsPGOpponent,SeriesGP,SeriesLead,SeriesWinPercent,PrevPTS,PrevOPTS,PrevMargin,MissingMins,Age,PlayoffMins,AdjustedPTSCombined,AdjustedPTSLinearRegression,AdjustedOPTSCombined,AdjustedOPTSLinearRegression,WinProba,WinProbaScaled,WinProba2x
0,2020-08-25,1610612743,1610612762,111.3,109.2,1610612762,111.3,108.8,5,-2,0.2,127,129,-2,0.682784,391.0,6895.763333,"[127.48736540401933, 96.66756465473213, 161.50...",104.164128,"[134.63823553970656, 60.72357522157745, 61.029...",108.690601,4.746,0.488825,0.47765
1,2020-08-25,1610612762,1610612743,111.3,108.8,1610612743,111.3,109.2,5,2,0.6,129,127,2,0.749396,396.0,6328.35,"[182.67070964658274, 119.36468191253289, 52.60...",106.173661,"[122.16573826202398, 154.1757264477285, 168.49...",106.302318,4.963,0.511175,0.52235
2,2020-08-25,1610612746,1610612742,116.3,109.9,1610612742,117.0,112.1,5,0,0.4,133,135,-2,0.704119,412.0,13322.215,"[139.93805031036345, 180.70674943542735, 111.0...",109.993346,"[66.57060097142994, 16.446161814548077, 134.32...",110.824344,4.989,0.51662,0.53324
3,2020-08-25,1610612742,1610612746,117.0,112.1,1610612746,116.3,109.9,5,0,0.4,135,133,2,0.657621,415.0,3609.745,"[153.83990106009992, 147.75663068772838, 83.62...",108.325594,"[82.98203323078451, 158.63904435204427, 112.62...",115.855987,4.668,0.48338,0.46676
4,2020-08-26,1610612749,1610612753,118.7,108.6,1610612753,107.3,108.3,5,2,0.6,121,106,15,0.862176,446.0,16320.056667,"[67.7717620316082, 193.65595771690164, 60.7674...",112.459469,"[14.961989426643868, 93.05680367268042, 180.49...",99.505404,5.676,0.572466,0.644932
5,2020-08-26,1610612753,1610612749,107.3,108.3,1610612749,118.7,108.6,5,-2,0.2,106,121,-15,0.688603,393.0,4178.705,"[92.95021999777282, 80.13762786243205, 74.4251...",99.872345,"[171.62005311889584, 183.81484397448935, 44.25...",113.857908,4.239,0.427534,0.355068
6,2020-08-26,1610612745,1610612760,117.8,114.8,1610612760,110.4,108.4,5,0,0.4,114,117,-3,0.703378,434.0,16812.746667,"[42.199926889263224, 142.7131021594928, 74.898...",108.296902,"[147.2377813693156, 141.86486216049798, 58.684...",107.82743,5.069,0.50914,0.51828
7,2020-08-26,1610612760,1610612745,110.4,108.4,1610612745,117.8,114.8,5,0,0.4,117,114,3,0.830964,381.0,7951.911667,"[31.363032811443418, 91.27193634837136, 240.45...",107.761163,"[97.24330035474668, 75.84954519693224, 225.246...",109.338376,4.887,0.49086,0.48172
8,2020-08-26,1610612747,1610612757,113.4,107.6,1610612757,115.0,116.1,5,2,0.6,135,115,20,0.839585,454.0,29173.07,"[139.607952561807, 145.12018431903925, 137.313...",114.528151,"[214.5789669093574, 119.1358503803151, 97.2396...",107.710259,5.391,0.535034,0.570067
9,2020-08-26,1610612757,1610612747,115.0,116.1,1610612747,113.4,107.6,5,-2,0.2,115,135,-20,0.735767,387.0,11450.935,"[141.69009984713762, 106.49796940172973, 135.6...",105.373259,"[142.4681523387144, 5.747685386583953, 171.182...",111.646657,4.685,0.464966,0.429933


In [35]:
for x in range(1, len(final),2):
    temp = final['AdjustedPTSLinearRegression'].iloc[x]
    final['AdjustedPTSLinearRegression'].iloc[x] = final['AdjustedOPTSLinearRegression'].iloc[x]
    final['AdjustedOPTSLinearRegression'].iloc[x] = temp
final =final.groupby(np.arange(len(final))//2).agg({'Date': 'first',
                                            'TeamID':'first',
                                             'OpponentTeamID':'first',
                                             'AdjustedPTSLinearRegression':'mean',
                                             'AdjustedOPTSLinearRegression':'mean',
                                             'WinProba2x':'first'})

In [36]:
import scipy.stats
norm_dist = scipy.stats.norm(0, 10)
final['ProbaNorm'] = final.apply(lambda x: norm_dist.cdf(x['AdjustedPTSLinearRegression'] - x['AdjustedOPTSLinearRegression']), axis = 1)
final

Unnamed: 0,Date,TeamID,OpponentTeamID,AdjustedPTSLinearRegression,AdjustedOPTSLinearRegression,WinProba2x,ProbaNorm
0,2020-08-25,1610612743,1610612762,105.233223,107.432131,0.47765,0.412978
1,2020-08-25,1610612746,1610612742,112.924666,109.574969,0.53324,0.631176
2,2020-08-26,1610612749,1610612753,113.158689,99.688874,0.644932,0.911007
3,2020-08-26,1610612745,1610612760,108.817639,107.794297,0.51828,0.540754
4,2020-08-26,1610612747,1610612757,113.087404,106.541759,0.570067,0.743626


In [37]:
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual,GameNum,Unnamed: 13,Unnamed: 14
0,2020-08-17,41900200.0,1610612743,1610612762,Nuggets,Jazz,0.446276,0.553724,104.907378,106.258136,135.0,125.0,1.0,,
1,2020-08-17,41900100.0,1610612761,1610612751,Raptors,Nets,0.587735,0.412265,107.809764,105.59255,134.0,110.0,1.0,,
2,2020-08-17,41900100.0,1610612738,1610612755,Celtics,76ers,0.390522,0.609478,100.521847,103.301425,109.0,101.0,1.0,,
3,2020-08-17,41900200.0,1610612746,1610612742,Clippers,Mavericks,0.676566,0.323434,107.580625,102.999441,118.0,110.0,1.0,,
4,2020-08-18,41900100.0,1610612749,1610612753,Bucks,Magic,0.616207,0.383793,107.926797,104.97147,110.0,122.0,1.0,,
5,2020-08-18,41900100.0,1610612754,1610612748,Pacers,Heat,0.690723,0.309277,105.55931,100.580296,101.0,113.0,1.0,,
6,2020-08-18,41900200.0,1610612745,1610612760,Rockets,Thunder,0.493277,0.506723,104.115003,104.283525,123.0,108.0,1.0,,
7,2020-08-18,41900100.0,1610612747,1610612757,Lakers,Trail Blazers,0.581614,0.418386,108.093849,106.033605,93.0,100.0,1.0,,
8,2020-08-19,41900200.0,1610612743,1610612762,Nuggets,Jazz,0.683257,0.316743,111.777117,107.008869,105.0,124.0,2.0,,
9,2020-08-19,41900100.0,1610612761,1610612751,Raptors,Nets,0.927164,0.072836,118.184497,103.634586,104.0,99.0,2.0,,


In [38]:
def get_game_info_calculated(row, final_col, games_col):
    if(not pd.isna(row[games_col])):
        return row[games_col]
    temp = final[final['TeamID'] == row['TeamID']]
    temp = final[final['OpponentTeamID'] == row['OpponentTeamID']]
    return temp[final_col].iloc[0]

games['TeamWinProba'] = games.apply(lambda x: get_game_info_calculated(x, 'ProbaNorm', 'TeamWinProba'), axis = 1)
games['OpponentTeamWinProba'] = games.apply(lambda x: 1 - x['TeamWinProba'], axis = 1)
games['TeamPoints'] = games.apply(lambda x: get_game_info_calculated(x, 'AdjustedPTSLinearRegression', 'TeamPoints'), axis = 1)
games['OpponentTeamPoints'] = games.apply(lambda x: get_game_info_calculated(x, 'AdjustedOPTSLinearRegression', 'OpponentTeamPoints'), axis = 1)
games

Unnamed: 0,Date,GameID,TeamID,OpponentTeamID,TeamName,OpponentTeamName,TeamWinProba,OpponentTeamWinProba,TeamPoints,OpponentTeamPoints,TeamPointsActual,OpponentTeamPointsActual,GameNum,Unnamed: 13,Unnamed: 14
0,2020-08-17,41900200.0,1610612743,1610612762,Nuggets,Jazz,0.446276,0.553724,104.907378,106.258136,135.0,125.0,1.0,,
1,2020-08-17,41900100.0,1610612761,1610612751,Raptors,Nets,0.587735,0.412265,107.809764,105.59255,134.0,110.0,1.0,,
2,2020-08-17,41900100.0,1610612738,1610612755,Celtics,76ers,0.390522,0.609478,100.521847,103.301425,109.0,101.0,1.0,,
3,2020-08-17,41900200.0,1610612746,1610612742,Clippers,Mavericks,0.676566,0.323434,107.580625,102.999441,118.0,110.0,1.0,,
4,2020-08-18,41900100.0,1610612749,1610612753,Bucks,Magic,0.616207,0.383793,107.926797,104.97147,110.0,122.0,1.0,,
5,2020-08-18,41900100.0,1610612754,1610612748,Pacers,Heat,0.690723,0.309277,105.55931,100.580296,101.0,113.0,1.0,,
6,2020-08-18,41900200.0,1610612745,1610612760,Rockets,Thunder,0.493277,0.506723,104.115003,104.283525,123.0,108.0,1.0,,
7,2020-08-18,41900100.0,1610612747,1610612757,Lakers,Trail Blazers,0.581614,0.418386,108.093849,106.033605,93.0,100.0,1.0,,
8,2020-08-19,41900200.0,1610612743,1610612762,Nuggets,Jazz,0.683257,0.316743,111.777117,107.008869,105.0,124.0,2.0,,
9,2020-08-19,41900100.0,1610612761,1610612751,Raptors,Nets,0.927164,0.072836,118.184497,103.634586,104.0,99.0,2.0,,


In [40]:
today = date.today()

pd.read_csv('predictions/games.csv').to_csv('predictions/games_' + today.strftime("%d-%m-%Y") +'_before.csv', index = False)
games.to_csv('predictions/games.csv', index = False)
games.to_csv('predictions/games_' + today.strftime("%d-%m-%Y") +'_after.csv', index = False)