In [550]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.library.parameters import SeasonAll

In [551]:
all_games = pd.read_csv('all_games.csv')

In [552]:
all_games['TS%'] = all_games['PTS'] / (2 * (all_games['FGA'] + (0.44 * all_games['FTA'])))

In [553]:
block_size = 30 # amount of games of context we are using for average stats
recent_size = 5 # amount of games of recent context we are using

def context_player(df):
    df['PTS_LB'] = df['PTS'].rolling(window=5, min_periods=1).mean().shift(-block_size)
    df['AST_LB'] = df['AST'].rolling(window=5, min_periods=1).mean().shift(-block_size)
    df['REB_LB'] = df['REB'].rolling(window=5, min_periods=1).mean().shift(-block_size)
    df['TS%_LR'] = df['TS%'].rolling(window=5, min_periods=1).mean().shift(-recent_size)
    df['MIN_LR'] = df['MIN'].rolling(window=5, min_periods=1).mean().shift(-recent_size)
    return df

all_games_plus = all_games.groupby('Player_ID').apply(context_player)
all_games_plus = all_games_plus.dropna()
all_games_plus.head()

  all_games_plus = all_games.groupby('Player_ID').apply(context_player)


Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2544,73591,22023,2544,22301195,"APR 14, 2024",LAL @ NOP,W,38,11,20,0.55,...,0,28,19,1,0.618375,25.6,9.4,8.8,0.679944,35.6
2544,73592,22023,2544,22301177,"APR 12, 2024",LAL @ MEM,W,41,13,20,0.65,...,1,37,-3,1,0.744767,26.4,9.4,8.8,0.734657,34.8
2544,73593,22023,2544,22301155,"APR 09, 2024",LAL vs. GSW,L,36,14,22,0.636,...,0,33,-6,1,0.681818,27.2,8.8,9.2,0.714572,34.6
2544,73594,22023,2544,22301127,"APR 06, 2024",LAL vs. CLE,W,36,10,18,0.556,...,1,24,10,1,0.594059,27.6,8.4,9.6,0.717841,34.4
2544,73595,22023,2544,22301103,"APR 03, 2024",LAL @ WAS,W,36,9,18,0.5,...,2,25,9,1,0.569217,25.4,7.2,7.0,0.71945,34.8


In [554]:
by_game = all_games_plus.sort_values(by=['Game_ID'])
by_game.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1627752,124958,22023,1627752,22301230,"DEC 07, 2023",LAL vs. NOP,W,23,5,9,0.556,...,3,15,23,1,0.833333,12.6,2.0,2.6,0.678032,28.0
1627742,70555,22023,1627742,22301230,"DEC 07, 2023",NOP @ LAL,L,31,4,13,0.308,...,3,9,-33,1,0.324207,29.8,8.2,6.6,0.603765,36.0
203469,161307,22023,203469,22301230,"DEC 07, 2023",NOP @ LAL,L,12,0,1,0.0,...,2,2,-5,1,0.531915,5.2,0.6,3.8,0.560812,10.4
1629627,158319,22023,1629627,22301230,"DEC 07, 2023",NOP @ LAL,L,26,6,8,0.75,...,3,13,-33,1,0.610902,30.0,4.2,8.0,0.695781,30.2
1626156,131852,22023,1626156,22301230,"DEC 07, 2023",LAL vs. NOP,W,22,6,10,0.6,...,1,14,23,1,0.7,15.8,6.0,2.0,0.570587,28.0


In [555]:
by_game = by_game[by_game['MIN'] >= 20]
by_game['Player_Count'] = by_game.groupby('Game_ID')['Game_ID'].transform('count') # don't need this column
by_game.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,PTS,PLUS_MINUS,VIDEO_AVAILABLE,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR,Player_Count
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
202685,148034,22023,202685,22301230,"DEC 07, 2023",NOP @ LAL,L,23,3,9,0.333,...,9,-25,1,0.455466,14.4,2.6,12.4,0.721348,31.6,14
1627752,124958,22023,1627752,22301230,"DEC 07, 2023",LAL vs. NOP,W,23,5,9,0.556,...,15,23,1,0.833333,12.6,2.0,2.6,0.678032,28.0,14
1627742,70555,22023,1627742,22301230,"DEC 07, 2023",NOP @ LAL,L,31,4,13,0.308,...,9,-33,1,0.324207,29.8,8.2,6.6,0.603765,36.0,14
1629627,158319,22023,1629627,22301230,"DEC 07, 2023",NOP @ LAL,L,26,6,8,0.75,...,13,-33,1,0.610902,30.0,4.2,8.0,0.695781,30.2,14
1626156,131852,22023,1626156,22301230,"DEC 07, 2023",LAL vs. NOP,W,22,6,10,0.6,...,14,23,1,0.7,15.8,6.0,2.0,0.570587,28.0,14


In [556]:
by_game_filtered = by_game[by_game['Player_Count'] >= 10]
by_game_filtered.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 59306 entries, (203493, 22531) to (1626156, 131852)
Data columns (total 34 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   SEASON_ID        59306 non-null  int64  
 1   Player_ID        59306 non-null  int64  
 2   Game_ID          59306 non-null  int64  
 3   GAME_DATE        59306 non-null  object 
 4   MATCHUP          59306 non-null  object 
 5   WL               59306 non-null  object 
 6   MIN              59306 non-null  int64  
 7   FGM              59306 non-null  int64  
 8   FGA              59306 non-null  int64  
 9   FG_PCT           59306 non-null  float64
 10  FG3M             59306 non-null  int64  
 11  FG3A             59306 non-null  int64  
 12  FG3_PCT          59306 non-null  float64
 13  FTM              59306 non-null  int64  
 14  FTA              59306 non-null  int64  
 15  FT_PCT           59306 non-null  float64
 16  OREB             59306 non-null 

In [557]:
by_game_filtered['HOME'] = by_game_filtered['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
by_game_filtered.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  by_game_filtered['HOME'] = by_game_filtered['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)


Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,PLUS_MINUS,VIDEO_AVAILABLE,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR,Player_Count,HOME
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
203493,22531,22015,203493,21500879,"FEB 28, 2016",DET vs. TOR,W,26,3,6,0.5,...,5,1,0.666667,0.4,0.2,0.2,0.449153,10.6,10,1
203484,24956,22015,203484,21500879,"FEB 28, 2016",DET vs. TOR,W,36,7,10,0.7,...,30,1,0.766284,18.6,2.2,3.4,0.538383,33.6,10,1
203083,39433,22015,203083,21500879,"FEB 28, 2016",DET vs. TOR,W,30,6,10,0.6,...,19,1,0.554734,18.8,0.8,15.8,0.511153,33.0,10,1
202704,72802,22015,202704,21500879,"FEB 28, 2016",DET vs. TOR,W,28,7,12,0.583,...,19,1,0.690407,18.0,6.0,4.2,0.637372,30.6,10,1
202687,14496,22015,202687,21500879,"FEB 28, 2016",TOR @ DET,L,22,1,2,0.5,...,1,1,0.5,6.0,0.6,13.8,0.593357,20.0,10,0


In [558]:
by_game_sorted = by_game_filtered.sort_values(by=['Game_ID', 'HOME'])
by_game_sorted.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,PLUS_MINUS,VIDEO_AVAILABLE,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR,Player_Count,HOME
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
202687,14496,22015,202687,21500879,"FEB 28, 2016",TOR @ DET,L,22,1,2,0.5,...,1,1,0.5,6.0,0.6,13.8,0.593357,20.0,10,0
201942,36361,22015,201942,21500879,"FEB 28, 2016",TOR @ DET,L,35,9,21,0.429,...,-29,1,0.448029,25.4,5.0,5.2,0.483794,35.6,10,0
202709,80528,22015,202709,21500879,"FEB 28, 2016",TOR @ DET,L,20,4,10,0.4,...,-13,1,0.478927,4.6,2.8,2.4,0.617517,25.0,10,0
202685,148567,22015,202685,21500879,"FEB 28, 2016",TOR @ DET,L,21,5,6,0.833,...,-27,1,0.854037,7.2,0.6,6.4,0.684573,27.2,10,0
203493,22531,22015,203493,21500879,"FEB 28, 2016",DET vs. TOR,W,26,3,6,0.5,...,5,1,0.666667,0.4,0.2,0.2,0.449153,10.6,10,1
203484,24956,22015,203484,21500879,"FEB 28, 2016",DET vs. TOR,W,36,7,10,0.7,...,30,1,0.766284,18.6,2.2,3.4,0.538383,33.6,10,1
203083,39433,22015,203083,21500879,"FEB 28, 2016",DET vs. TOR,W,30,6,10,0.6,...,19,1,0.554734,18.8,0.8,15.8,0.511153,33.0,10,1
202704,72802,22015,202704,21500879,"FEB 28, 2016",DET vs. TOR,W,28,7,12,0.583,...,19,1,0.690407,18.0,6.0,4.2,0.637372,30.6,10,1
202699,60718,22015,202699,21500879,"FEB 28, 2016",DET vs. TOR,W,25,5,9,0.556,...,21,1,0.708502,16.2,1.6,6.8,0.602394,33.0,10,1
202694,108572,22015,202694,21500879,"FEB 28, 2016",DET vs. TOR,W,32,5,10,0.5,...,22,1,0.595238,14.2,2.0,4.6,0.402973,36.6,10,1


In [559]:
by_game_sorted['Team_Count'] = by_game_sorted.groupby(['Game_ID', 'HOME']).transform('size')
by_game_sorted.head(22)

Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,VIDEO_AVAILABLE,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR,Player_Count,HOME,Team_Count
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
202687,14496,22015,202687,21500879,"FEB 28, 2016",TOR @ DET,L,22,1,2,0.5,...,1,0.5,6.0,0.6,13.8,0.593357,20.0,10,0,4
201942,36361,22015,201942,21500879,"FEB 28, 2016",TOR @ DET,L,35,9,21,0.429,...,1,0.448029,25.4,5.0,5.2,0.483794,35.6,10,0,4
202709,80528,22015,202709,21500879,"FEB 28, 2016",TOR @ DET,L,20,4,10,0.4,...,1,0.478927,4.6,2.8,2.4,0.617517,25.0,10,0,4
202685,148567,22015,202685,21500879,"FEB 28, 2016",TOR @ DET,L,21,5,6,0.833,...,1,0.854037,7.2,0.6,6.4,0.684573,27.2,10,0,4
203493,22531,22015,203493,21500879,"FEB 28, 2016",DET vs. TOR,W,26,3,6,0.5,...,1,0.666667,0.4,0.2,0.2,0.449153,10.6,10,1,6
203484,24956,22015,203484,21500879,"FEB 28, 2016",DET vs. TOR,W,36,7,10,0.7,...,1,0.766284,18.6,2.2,3.4,0.538383,33.6,10,1,6
203083,39433,22015,203083,21500879,"FEB 28, 2016",DET vs. TOR,W,30,6,10,0.6,...,1,0.554734,18.8,0.8,15.8,0.511153,33.0,10,1,6
202704,72802,22015,202704,21500879,"FEB 28, 2016",DET vs. TOR,W,28,7,12,0.583,...,1,0.690407,18.0,6.0,4.2,0.637372,30.6,10,1,6
202699,60718,22015,202699,21500879,"FEB 28, 2016",DET vs. TOR,W,25,5,9,0.556,...,1,0.708502,16.2,1.6,6.8,0.602394,33.0,10,1,6
202694,108572,22015,202694,21500879,"FEB 28, 2016",DET vs. TOR,W,32,5,10,0.5,...,1,0.595238,14.2,2.0,4.6,0.402973,36.6,10,1,6


In [560]:
by_game_sorted['Opp_Count'] = by_game_sorted['Player_Count'] - by_game_sorted['Team_Count']
by_game_sorted = by_game_sorted[by_game_sorted['Team_Count'] > 4]
by_game_sorted = by_game_sorted[by_game_sorted['Opp_Count'] > 4]
by_game_sorted.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 49973 entries, (202696, 150635) to (1626156, 131852)
Data columns (total 37 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   SEASON_ID        49973 non-null  int64  
 1   Player_ID        49973 non-null  int64  
 2   Game_ID          49973 non-null  int64  
 3   GAME_DATE        49973 non-null  object 
 4   MATCHUP          49973 non-null  object 
 5   WL               49973 non-null  object 
 6   MIN              49973 non-null  int64  
 7   FGM              49973 non-null  int64  
 8   FGA              49973 non-null  int64  
 9   FG_PCT           49973 non-null  float64
 10  FG3M             49973 non-null  int64  
 11  FG3A             49973 non-null  int64  
 12  FG3_PCT          49973 non-null  float64
 13  FTM              49973 non-null  int64  
 14  FTA              49973 non-null  int64  
 15  FT_PCT           49973 non-null  float64
 16  OREB             49973 non-null

In [561]:
by_game_sorted.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR,Player_Count,HOME,Team_Count,Opp_Count
Player_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
202696,150635,22016,202696,21600300,"DEC 04, 2016",ORL @ DET,W,28,8,14,0.571,...,0.571429,20.6,2.0,7.2,0.526287,29.4,11,0,5,6
203932,50670,22016,203932,21600300,"DEC 04, 2016",ORL @ DET,W,22,4,8,0.5,...,0.512295,12.2,2.2,6.8,0.450893,24.4,11,0,5,6
203095,44722,22016,203095,21600300,"DEC 04, 2016",ORL @ DET,W,32,1,7,0.143,...,0.317259,17.6,2.0,1.4,0.624145,33.8,11,0,5,6
202687,14452,22016,202687,21600300,"DEC 04, 2016",ORL @ DET,W,30,2,3,0.667,...,0.64433,5.0,0.0,5.0,0.414706,32.6,11,0,5,6
201145,55200,22016,201145,21600300,"DEC 04, 2016",ORL @ DET,W,30,6,11,0.545,...,0.636364,11.2,0.4,3.2,0.424553,24.6,11,0,5,6
203083,39391,22016,203083,21600300,"DEC 04, 2016",DET vs. ORL,L,35,5,11,0.455,...,0.39185,18.4,0.4,15.4,0.535754,27.6,11,1,6,5
202397,138344,22016,202397,21600300,"DEC 04, 2016",DET vs. ORL,L,25,5,10,0.5,...,0.505515,11.8,6.6,3.4,0.615155,31.0,11,1,6,5
203484,24914,22016,203484,21600300,"DEC 04, 2016",DET vs. ORL,L,34,4,12,0.333,...,0.375,13.2,2.4,4.6,0.575956,33.2,11,1,6,5
202704,72781,22016,202704,21600300,"DEC 04, 2016",DET vs. ORL,L,23,7,12,0.583,...,0.698758,18.6,4.8,1.8,0.540076,32.4,11,1,6,5
202699,60675,22016,202699,21600300,"DEC 04, 2016",DET vs. ORL,L,35,5,12,0.417,...,0.442122,16.0,3.4,7.2,0.663966,34.2,11,1,6,5


In [562]:
by_game_sorted = by_game_sorted.sort_values(by=['Game_ID', 'HOME', 'MIN'])

def filter_t5(group):
    return group.tail(5)

by_game_sorted_filtered = by_game_sorted.groupby(['Game_ID', 'HOME']).apply(filter_t5).reset_index(drop=True)
by_game_sorted_filtered.head(20)

  by_game_sorted_filtered = by_game_sorted.groupby(['Game_ID', 'HOME']).apply(filter_t5).reset_index(drop=True)


Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,TS%,PTS_LB,AST_LB,REB_LB,TS%_LR,MIN_LR,Player_Count,HOME,Team_Count,Opp_Count
0,22016,203932,21600300,"DEC 04, 2016",ORL @ DET,W,22,4,8,0.5,...,0.512295,12.2,2.2,6.8,0.450893,24.4,11,0,5,6
1,22016,202696,21600300,"DEC 04, 2016",ORL @ DET,W,28,8,14,0.571,...,0.571429,20.6,2.0,7.2,0.526287,29.4,11,0,5,6
2,22016,202687,21600300,"DEC 04, 2016",ORL @ DET,W,30,2,3,0.667,...,0.64433,5.0,0.0,5.0,0.414706,32.6,11,0,5,6
3,22016,201145,21600300,"DEC 04, 2016",ORL @ DET,W,30,6,11,0.545,...,0.636364,11.2,0.4,3.2,0.424553,24.6,11,0,5,6
4,22016,203095,21600300,"DEC 04, 2016",ORL @ DET,W,32,1,7,0.143,...,0.317259,17.6,2.0,1.4,0.624145,33.8,11,0,5,6
5,22016,202397,21600300,"DEC 04, 2016",DET vs. ORL,L,25,5,10,0.5,...,0.505515,11.8,6.6,3.4,0.615155,31.0,11,1,6,5
6,22016,203484,21600300,"DEC 04, 2016",DET vs. ORL,L,34,4,12,0.333,...,0.375,13.2,2.4,4.6,0.575956,33.2,11,1,6,5
7,22016,203083,21600300,"DEC 04, 2016",DET vs. ORL,L,35,5,11,0.455,...,0.39185,18.4,0.4,15.4,0.535754,27.6,11,1,6,5
8,22016,202699,21600300,"DEC 04, 2016",DET vs. ORL,L,35,5,12,0.417,...,0.442122,16.0,3.4,7.2,0.663966,34.2,11,1,6,5
9,22016,202694,21600300,"DEC 04, 2016",DET vs. ORL,L,35,7,12,0.583,...,0.815217,20.8,2.6,7.0,0.537834,32.0,11,1,6,5


In [563]:
by_game_sorted_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40970 entries, 0 to 40969
Data columns (total 37 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   SEASON_ID        40970 non-null  int64  
 1   Player_ID        40970 non-null  int64  
 2   Game_ID          40970 non-null  int64  
 3   GAME_DATE        40970 non-null  object 
 4   MATCHUP          40970 non-null  object 
 5   WL               40970 non-null  object 
 6   MIN              40970 non-null  int64  
 7   FGM              40970 non-null  int64  
 8   FGA              40970 non-null  int64  
 9   FG_PCT           40970 non-null  float64
 10  FG3M             40970 non-null  int64  
 11  FG3A             40970 non-null  int64  
 12  FG3_PCT          40970 non-null  float64
 13  FTM              40970 non-null  int64  
 14  FTA              40970 non-null  int64  
 15  FT_PCT           40970 non-null  float64
 16  OREB             40970 non-null  int64  
 17  DREB        

In [564]:
from nba_api.stats.endpoints import boxscoreadvancedv2
from nba_api.stats.endpoints import boxscoretraditionalv2
from nba_api.stats.endpoints import boxscoresummaryv2

In [190]:
bsa = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id='0021800470')
bsa = bsa.get_data_frames()[0]
bsa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 32 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   GAME_ID            22 non-null     object 
 1   TEAM_ID            22 non-null     int64  
 2   TEAM_ABBREVIATION  22 non-null     object 
 3   TEAM_CITY          22 non-null     object 
 4   PLAYER_ID          22 non-null     int64  
 5   PLAYER_NAME        22 non-null     object 
 6   NICKNAME           22 non-null     object 
 7   START_POSITION     22 non-null     object 
 8   COMMENT            22 non-null     object 
 9   MIN                21 non-null     object 
 10  E_OFF_RATING       21 non-null     float64
 11  OFF_RATING         22 non-null     float64
 12  E_DEF_RATING       21 non-null     float64
 13  DEF_RATING         22 non-null     float64
 14  E_NET_RATING       21 non-null     float64
 15  NET_RATING         22 non-null     float64
 16  AST_PCT            21 non-nu

In [565]:
headers  = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

In [566]:
game_ids = all_games['Game_ID'].unique().tolist()
for i in range(len(game_ids)):
    game_ids[i] = f"{game_ids[i]:010}"
len(game_ids)

19055

In [193]:
all_games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162339 entries, 0 to 162338
Data columns (total 28 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   SEASON_ID        162339 non-null  int64  
 1   Player_ID        162339 non-null  int64  
 2   Game_ID          162339 non-null  int64  
 3   GAME_DATE        162339 non-null  object 
 4   MATCHUP          162339 non-null  object 
 5   WL               162339 non-null  object 
 6   MIN              162339 non-null  int64  
 7   FGM              162339 non-null  int64  
 8   FGA              162339 non-null  int64  
 9   FG_PCT           162339 non-null  float64
 10  FG3M             162339 non-null  int64  
 11  FG3A             162339 non-null  int64  
 12  FG3_PCT          162339 non-null  float64
 13  FTM              162339 non-null  int64  
 14  FTA              162339 non-null  int64  
 15  FT_PCT           162339 non-null  float64
 16  OREB             162339 non-null  int6

In [339]:
def get_box_scores(game_id):
    game_data_adv = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id, headers=headers, timeout=100)
    df_adv = game_data_adv.player_stats.get_data_frame()
    game_data_trad = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id, headers=headers, timeout=100)
    df_trad = game_data_trad.player_stats.get_data_frame()
    game_data_sum = boxscoresummaryv2.BoxScoreSummaryV2(game_id=game_id, headers=headers, timeout=100)
    df_sum = game_data_sum.game_summary.get_data_frame()
    df_sum_plus = pd.concat([df_sum] * df_trad.shape[0], ignore_index=True)
    df_info = pd.concat([df_adv, df_trad, df_sum_plus], axis=1)
    df_info = df_info.T.drop_duplicates().T
    df_info = df_info[df_info['START_POSITION'].isin(['G', 'F', 'C'])]
    return df_info

def filter_box_scores(df):
    to_remove = [
        'WH_STATUS',
        'LIVE_PERIOD_TIME_BCAST',
        'NATL_TV_BROADCASTER_ABBREVIATION',
        'NICKNAME',
        'COMMENT',
        'E_OFF_RATING',
        'OFF_RATING',
        'E_DEF_RATING',
        'E_NET_RATING',
        'NET_RATING',
        'AST_PCT',
        'AST_TOV',
        'AST_RATIO',
        'OREB_PCT',
        'DREB_PCT',
        'REB_PCT',
        'TM_TOV_PCT',
        'EFG_PCT',
        'PIE',
        'FGM',
        'FGA',
        'FG_PCT',
        'FG3M',
        'FG3A',
        'FG3_PCT',
        'FTM',
        'FTA',
        'FT_PCT',
        'OREB',
        'DREB',
        'BLK',
        'STL',
        'TO',
        'PF',
        'PLUS_MINUS',
        'GAMECODE',
        'LIVE_PC_TIME',
        'LIVE_PERIOD',
        'TEAM_CITY',
        'GAME_STATUS_ID',
        'GAME_SEQUENCE',
        'GAME_STATUS_TEXT',
        'PACE_PER40',
        'E_USG_PCT',
        'E_PACE',
        'SEASON'
    ]

    for c in to_remove:
        if c in df:
            df = df.drop(c, axis=1)

    return df

def fix_time(df):
    df['YEAR'] = df['GAME_DATE_EST'].str[:4].astype(int)
    df['MONTH'] = df['GAME_DATE_EST'].str[5:7].astype(int)
    df['DAY'] = df['GAME_DATE_EST'].str[8:10].astype(int)
    # df = df.drop('GAME_DATE_EST', axis=1)
    return df

In [340]:
game_ids[:10]

['0022301190',
 '0022301175',
 '0022301167',
 '0022301139',
 '0022301119',
 '0022301112',
 '0022301096',
 '0022301082',
 '0022301070',
 '0022301053']

In [379]:
game_info = pd.DataFrame()
for i in range(639, 900):
    game_data = get_box_scores(game_ids[i])
    # game_data = filter_box_scores(game_data)
    # game_data = fix_time(game_data)
    game_info = pd.concat([game_info, game_data])
    if i % 10 == 0:
        print(i)
game_info.head(10)

640
650
660
670


InvalidIndexError: Reindexing only valid with uniquely valued Index objects

In [377]:
game_2 = pd.read_csv('better_games.csv')
game_3 = pd.concat([game_2, game_info])
game_3.to_csv('better_games.csv')
# game_info.info()

In [358]:
game_info = filter_box_scores(game_info)
game_info = fix_time(game_info)
game_info.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2000 entries, 0 to 19
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   GAME_ID            2000 non-null   object
 1   TEAM_ID            2000 non-null   object
 2   TEAM_ABBREVIATION  2000 non-null   object
 3   PLAYER_ID          2000 non-null   object
 4   PLAYER_NAME        2000 non-null   object
 5   START_POSITION     2000 non-null   object
 6   MIN                2000 non-null   object
 7   DEF_RATING         2000 non-null   object
 8   TS_PCT             2000 non-null   object
 9   USG_PCT            2000 non-null   object
 10  PACE               2000 non-null   object
 11  POSS               2000 non-null   object
 12  REB                2000 non-null   object
 13  AST                2000 non-null   object
 14  PTS                2000 non-null   object
 15  GAME_DATE_EST      2000 non-null   object
 16  HOME_TEAM_ID       2000 non-null   object
 17  VI

In [359]:
game_info = game_info.sort_values(by=['PLAYER_ID', 'YEAR', 'MONTH', 'DAY'])
game_info.head(20)

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_ID,PLAYER_NAME,START_POSITION,MIN,DEF_RATING,TS_PCT,USG_PCT,...,POSS,REB,AST,PTS,GAME_DATE_EST,HOME_TEAM_ID,VISITOR_TEAM_ID,YEAR,MONTH,DAY
14,22101028,1610612747,LAL,2544,LeBron James,C,40.000000:20,114.8,0.679,0.277,...,86,9.0,3.0,30.0,2022-03-14T00:00:00,1610612747,1610612761,2022,3,14
0,22101051,1610612747,LAL,2544,LeBron James,F,44.000000:41,120.7,0.619,0.327,...,86,9.0,7.0,36.0,2022-03-18T00:00:00,1610612761,1610612747,2022,3,18
1,22300701,1610612747,LAL,2544,LeBron James,F,40.000000:18,105.2,0.604,0.256,...,77,5.0,5.0,24.0,2024-02-03T00:00:00,1610612752,1610612747,2024,2,3
2,22101051,1610612747,LAL,2730,Dwight Howard,C,23.000000:53,143.2,0.689,0.13,...,45,6.0,0.0,7.0,2022-03-18T00:00:00,1610612761,1610612747,2022,3,18
4,22100613,1610612756,PHX,101108,Chris Paul,G,33.000000:15,92.4,0.631,0.189,...,65,2.0,12.0,15.0,2022-01-11T00:00:00,1610612761,1610612756,2022,1,11
18,22200762,1610612756,PHX,101108,Chris Paul,G,34.000000:40,110.0,0.72,0.2,...,69,4.0,9.0,19.0,2023-01-30T00:00:00,1610612756,1610612761,2023,1,30
2,22100928,1610612751,BKN,200746,LaMarcus Aldridge,C,29.000000:30,116.7,0.601,0.206,...,59,9.0,3.0,16.0,2022-03-01T00:00:00,1610612761,1610612751,2022,3,1
4,22101172,1610612748,MIA,200768,Kyle Lowry,G,34.000000:33,113.0,0.722,0.169,...,69,6.0,10.0,16.0,2022-04-03T00:00:00,1610612761,1610612748,2022,4,3
19,22200031,1610612748,MIA,200768,Kyle Lowry,G,37.000000:15,116.0,0.666,0.188,...,73,2.0,6.0,17.0,2022-10-22T00:00:00,1610612748,1610612761,2022,10,22
19,22200045,1610612748,MIA,200768,Kyle Lowry,G,36.000000:06,93.2,0.359,0.12,...,73,3.0,2.0,7.0,2022-10-24T00:00:00,1610612748,1610612761,2022,10,24


In [412]:
def get_adv(game_id):
    game_data_adv = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id, headers=headers, timeout=100)
    df_adv = game_data_adv.player_stats.get_data_frame()
    return df_adv
    
def get_trad(game_id):
    game_data_trad = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id, headers=headers, timeout=100)
    df_trad = game_data_trad.player_stats.get_data_frame()
    return df_trad

def get_sum(game_id):
    game_data_sum = boxscoresummaryv2.BoxScoreSummaryV2(game_id=game_id, headers=headers, timeout=100)
    df_sum = game_data_sum.game_summary.get_data_frame()
    return df_sum

In [438]:
sum_data = pd.DataFrame()
for i in range(6751, len(game_ids)):
    game_sum = get_sum(game_ids[i])
    sum_data = pd.concat([sum_data, game_sum])
    if i % 50 == 0:
        print(i)
# trad_data.info()

6800
6850
6900
6950
7000
7050
7100
7150
7200
7250
7300
7350
7400
7450
7500
7550
7600
7650
7700
7750
7800
7850
7900
7950
8000
8050
8100
8150
8200
8250
8300
8350
8400
8450
8500
8550
8600
8650
8700
8750
8800
8850
8900
8950
9000
9050
9100
9150
9200
9250
9300
9350
9400
9450
9500
9550
9600
9650
9700
9750
9800
9850
9900
9950
10000
10050
10100
10150
10200
10250
10300
10350
10400
10450
10500
10550
10600
10650
10700
10750
10800
10850
10900
10950
11000
11050
11100
11150
11200
11250
11300
11350
11400
11450
11500
11550
11600
11650
11700
11750
11800
11850
11900
11950
12000
12050
12100
12150
12200
12250
12300
12350
12400
12450
12500
12550
12600
12650
12700
12750
12800
12850
12900
12950
13000
13050
13100
13150
13200
13250
13300
13350
13400
13450
13500
13550
13600
13650
13700
13750
13800
13850
13900
13950
14000
14050
14100
14150
14200
14250
14300
14350
14400
14450
14500
14550
14600
14650
14700
14750
14800
14850
14900
14950
15000
15050
15100
15150
15200
15250
15300
15350
15400
15450
15500
15550
15600
15

In [441]:
# new_data = pd.read_csv('sum_data.csv')
# new_data = pd.concat([new_data, sum_data])
# new_data.to_csv('sum_data.csv')
sum_data.head()

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS
0,2016-02-24T00:00:00,1,21500845,3,Final,20160224/CHACLE,1610612739,1610612766,2015,4,,,Q4 -,1
0,2016-02-21T00:00:00,5,21500827,3,Final,20160221/CHABKN,1610612751,1610612766,2015,4,,,Q4 -,1
0,2016-02-10T00:00:00,1,21500789,3,Final,20160210/CHAIND,1610612754,1610612766,2015,4,,,Q4 -,1
0,2016-02-08T00:00:00,1,21500774,3,Final,20160208/CHICHA,1610612766,1610612741,2015,4,,,Q4 -,1
0,2016-02-06T00:00:00,2,21500761,3,Final,20160206/WASCHA,1610612766,1610612764,2015,4,,,Q4 -,1


In [567]:
def get_refs(game_id):
    game_data_sum = boxscoresummaryv2.BoxScoreSummaryV2(game_id=game_id, headers=headers, timeout=100)
    df_refs = game_data_sum.officials.get_data_frame()
    lst_refs = df_refs.values.tolist()
    for i in range(len(lst_refs)):
        lst_refs[i].append(game_id)
    return lst_refs

In [568]:
ref_data = []
for i in range(8801, len(game_ids)):
    game_refs = get_refs(game_ids[i])
    ref_data.extend(game_refs)

    if i % 200 == 0:
        print(i)

9000
9200
9400
9600
9800
10000
10200
10400
10600
10800
11000
11200
11400
11600
11800
12000
12200
12400
12600
12800
13000
13200
13400
13600
13800
14000
14200
14400
14600
14800
15000
15200
15400
15600
15800
16000
16200
16400
16600
16800
17000
17200
17400
17600
17800
18000
18200
18400
18600
18800
19000


In [596]:
old_ref = pd.read_csv('ref_data.csv')
old_ref = old_ref.drop(columns=['Unnamed: 0'])
new_ref = pd.DataFrame(ref_data)
new_ref.columns = ['0', '1', '2', '3', '4']
final_ref = pd.concat([old_ref, new_ref], axis=0, ignore_index=True)
final_ref.to_csv('ref_data.csv')
final_ref.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57184 entries, 0 to 57183
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       57184 non-null  int64 
 1   1       57184 non-null  object
 2   2       57184 non-null  object
 3   3       57184 non-null  object
 4   4       57184 non-null  object
dtypes: int64(1), object(4)
memory usage: 2.2+ MB


In [2]:
import requests
import os

In [682]:
apiKey = 

sport = 'basketball_nba'
date = '2023-11-07T00:40:00Z'

In [683]:
event_response = requests.get(
    f'https://api.the-odds-api.com//v4/historical/sports/{sport}/events?apiKey={apiKey}&date={date}',
)
if event_response.status_code == 200:
    odds_json = event_response.json()
else:
    print(f'Failed to get odds: status_code {event_response.status_code}, response body {event_response.text}')

In [684]:
odds_json

{'timestamp': '2023-11-07T00:35:42Z',
 'previous_timestamp': '2023-11-07T00:30:43Z',
 'next_timestamp': '2023-11-07T00:40:42Z',
 'data': [{'id': '88b6c95aa5c16857922d78700d0a11fa',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-11-07T00:10:00Z',
   'home_team': 'Orlando Magic',
   'away_team': 'Dallas Mavericks'},
  {'id': '3dc423d708b971aac9e77ac7b206ad73',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-11-07T00:10:32Z',
   'home_team': 'Indiana Pacers',
   'away_team': 'San Antonio Spurs'},
  {'id': '69644724ea2b68cb009079f406db54e3',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-11-07T00:10:39Z',
   'home_team': 'Detroit Pistons',
   'away_team': 'Golden State Warriors'},
  {'id': '4ef2d407cd6d9b3352624148f8336c68',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-11-07T00:10:55Z',
   'home_team': 'Philadelphia 76ers',
   'away_team':

In [443]:
events = []
for event in odds_json['data']:
    event_id = event['id']
    commence_time = event['commence_time']
    home_team = event['home_team']
    away_team = event['away_team']

    events.append([event_id, commence_time, home_team, away_team])

events_df = pd.DataFrame(events)

In [444]:
all_events = pd.concat([all_events, events_df])

In [445]:
date = all_events[1].iloc[-1]
date

'2024-06-18T00:40:00Z'

In [439]:
all_events.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1809 entries, 0 to 0
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       1809 non-null   object
 1   1       1809 non-null   object
 2   2       1809 non-null   object
 3   3       1809 non-null   object
dtypes: object(4)
memory usage: 70.7+ KB


In [403]:
for i in range(5):
    event_response = requests.get(
        f'https://api.the-odds-api.com//v4/historical/sports/{sport}/events?apiKey={apiKey}&date={date}',
    )
    if event_response.status_code == 200:
        odds_json = event_response.json()
    else:
        print(f'Failed to get odds: status_code {event_response.status_code}, response body {event_response.text}')
    
    events = []
    for event in odds_json['data']:
        event_id = event['id']
        commence_time = event['commence_time']
        home_team = event['home_team']
        away_team = event['away_team']

        events.append([event_id, commence_time, home_team, away_team])

    events_df = pd.DataFrame(events)
    all_events = pd.concat([all_events, events_df])
    date = all_events[1].iloc[-1]
    print(i)

all_events.info()

0
1
2
3
4
<class 'pandas.core.frame.DataFrame'>
Index: 1804 entries, 0 to 0
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       1804 non-null   object
 1   1       1804 non-null   object
 2   2       1804 non-null   object
 3   3       1804 non-null   object
dtypes: object(4)
memory usage: 70.5+ KB


In [450]:
all_events.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1810 entries, 0 to 0
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       1810 non-null   object
 1   1       1810 non-null   object
 2   2       1810 non-null   object
 3   3       1810 non-null   object
dtypes: object(4)
memory usage: 70.7+ KB


In [451]:
all_events.to_csv('events.csv')

In [452]:
events_info = pd.read_csv('events.csv')
events_info = events_info.drop(columns=['Unnamed: 0'])
events_info = events_info.drop_duplicates()

In [615]:
events_info.iloc[40:50]

Unnamed: 0,0,1,2,3
47,df65d43286f345b284c706cffd7ba17e,2023-05-30T00:30:00Z,Boston Celtics,Miami Heat
48,7c21e4f48eeb70e19f7932c0760b2f1b,2023-06-02T00:30:00Z,Denver Nuggets,Miami Heat
49,047095505c9c25015b083f6e1c68ea23,2023-06-05T00:00:00Z,Denver Nuggets,Miami Heat
50,69623f443d3a43b18a90221f1ced9ea2,2023-06-08T00:30:00Z,Miami Heat,Denver Nuggets
51,1b1a88e54d833b43de000f9e6263a60e,2023-06-10T00:30:00Z,Miami Heat,Denver Nuggets
52,f60db88622dc591abd922d97da63f34a,2023-06-13T00:30:00Z,Denver Nuggets,Miami Heat
53,fd55db2fa9ee5be1f108be5151e2ecb0,2023-10-24T23:00:00Z,Denver Nuggets,Los Angeles Lakers
54,a44e83dd9ce3f2317ec644774daa859b,2023-10-24T23:00:00Z,Golden State Warriors,Phoenix Suns
55,184da460295c1cfd98b0ff31e8fc2b13,2023-12-25T16:00:00Z,Miami Heat,Philadelphia 76ers
56,5a4b28938902c4fba16da19c6e8c1296,2023-12-25T16:00:00Z,New York Knicks,Milwaukee Bucks


In [882]:
apiKey = '0472e2e243597df61ff8139f6a2841ab'

sport = 'basketball_nba'
date = '2023-12-24T00:40:00Z'

In [883]:
event_response = requests.get(
    f'https://api.the-odds-api.com//v4/historical/sports/{sport}/events?apiKey={apiKey}&date={date}',
)
if event_response.status_code == 200:
    odds_json = event_response.json()
else:
    print(f'Failed to get odds: status_code {event_response.status_code}, response body {event_response.text}')

In [884]:
odds_json

{'timestamp': '2023-12-24T00:35:38Z',
 'previous_timestamp': '2023-12-24T00:30:38Z',
 'next_timestamp': '2023-12-24T00:40:38Z',
 'data': [{'id': 'd5c718e775d1e9ee0e3b2bbc24fbc5fb',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-12-24T00:10:00Z',
   'home_team': 'New Orleans Pelicans',
   'away_team': 'Houston Rockets'},
  {'id': '7c896c95526cd2232172b564a27ac4f9',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-12-24T00:10:00Z',
   'home_team': 'Indiana Pacers',
   'away_team': 'Orlando Magic'},
  {'id': '47cc3ceaf582e5c4275d3e4aa3f57c46',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-12-24T00:10:54Z',
   'home_team': 'Charlotte Hornets',
   'away_team': 'Denver Nuggets'},
  {'id': 'a6f1107ca15f65b4bd7887ca11ef80ee',
   'sport_key': 'basketball_nba',
   'sport_title': 'NBA',
   'commence_time': '2023-12-24T00:40:00Z',
   'home_team': 'Atlanta Hawks',
   'away_team': 'Memphi

In [885]:
sport = 'basketball_nba'
regions = 'us'
markets = 'player_points'
date_format = 'iso'
odds_format = 'decimal'
books = 'draftkings'
date = '2023-12-24T00:10:00Z'
event_id = 'd5c718e775d1e9ee0e3b2bbc24fbc5fb'

In [886]:
# https://api.the-odds-api.com/v4/historical/sports/basketball_nba/events/48dbd6bbfeb72fae383de550504df9cc/odds?apiKey=0472e2e243597df61ff8139f6a2841ab&regions=us&markets=player_points&dateFormat=iso&oddsFormat=decimal&bookmakers=draftkings%2C%20fanduel%2C%20bovada%2C%20pinnacle%2C%20betmgm%2C%20betonlineag&date=2024-06-18T00%3A30%3A00Z

odds_response = requests.get(
    f'https://api.the-odds-api.com/v4/historical/sports/{sport}/events/{event_id}/odds?apiKey={apiKey}&regions={regions}&markets={markets}&dateFormat={date_format}&oddsFormat={odds_format}&bookmakers={books}&date={date}',
)
if odds_response.status_code == 200:
    odds_json = odds_response.json()
else:
    print(f'Failed to get odds: status_code {event_response.status_code}, response body {event_response.text}')

In [887]:
odds_json['data']

{'id': 'd5c718e775d1e9ee0e3b2bbc24fbc5fb',
 'sport_key': 'basketball_nba',
 'sport_title': 'NBA',
 'commence_time': '2023-12-24T00:10:00Z',
 'home_team': 'New Orleans Pelicans',
 'away_team': 'Houston Rockets',
 'bookmakers': [{'key': 'draftkings',
   'title': 'DraftKings',
   'last_update': '2023-12-24T00:05:34Z',
   'markets': [{'key': 'player_points',
     'last_update': '2023-12-24T00:05:17Z',
     'outcomes': [{'name': 'Over',
       'description': 'Alperen Sengun',
       'price': 1.83,
       'point': 17.5},
      {'name': 'Under',
       'description': 'Alperen Sengun',
       'price': 1.91,
       'point': 17.5},
      {'name': 'Over',
       'description': 'Brandon Ingram',
       'price': 1.87,
       'point': 22.5},
      {'name': 'Under',
       'description': 'Brandon Ingram',
       'price': 1.87,
       'point': 22.5},
      {'name': 'Over',
       'description': 'CJ McCollum',
       'price': 1.8,
       'point': 16.5},
      {'name': 'Under',
       'description': 'CJ

In [877]:
event_id_m = []
dates = []
names = []
descriptions = []
prices = []
points = []

for line in odds_json['data']['bookmakers'][0]['markets'][0]['outcomes']:
    event_id_m.append(event_id)
    dates.append(date)
    names.append(line['name'])
    descriptions.append(line['description'])
    prices.append(line['price'])
    points.append(line['point'])

odds_data = pd.DataFrame({
    'event_id': event_id_m,
    'date': dates,
    'o/u': names,
    'player': descriptions,
    'price': prices,
    'line': points
})

In [878]:
old = pd.read_csv('more_point_odds_data.csv')
old = old.drop(columns=['Unnamed: 0'])
up = pd.concat([old, odds_data], ignore_index=True)
up.to_csv('more_point_odds_data.csv')

In [609]:
'3d4c502f3ca5f026ab17bcaf740a0737' in events_info['0']

False

In [544]:
# for i in range(len(events_info)):
#     event_id = events_info['0'].iloc[i]
#     date = events_info['1'].iloc[i]
#     odds_response = requests.get(
#         f'https://api.the-odds-api.com/v4/historical/sports/{sport}/events/{event_id}/odds?apiKey={apiKey}&regions={regions}&markets={markets}&dateFormat={date_format}&oddsFormat={odds_format}&bookmakers={books}&date={date}',
#     )
#     if odds_response.status_code == 200:
#         odds_json = odds_response.json()
#     else:
#         print(f'Failed to get odds: status_code {odds_response.status_code}, response body {odds_response.text}')
    
#     for line in odds_json['data']['bookmakers'][0]['markets'][0]['outcomes']:
#         event_id_m.append(event_id)
#         dates.append(date)
#         names.append(line['name'])
#         descriptions.append(line['description'])
#         prices.append(line['price'])
#         points.append(line['point'])
        
#     if i % 50 == 0:
#         print(i)

Failed to get odds: status_code 404, response body {"message":"Event not found. The event may have expired or the event id is invalid.","error_code":"EVENT_NOT_FOUND","details_url":"https://the-odds-api.com/liveapi/guides/v4/api-error-codes.html#event-not-found"}

0
Failed to get odds: status_code 404, response body {"message":"Event not found. The event may have expired or the event id is invalid.","error_code":"EVENT_NOT_FOUND","details_url":"https://the-odds-api.com/liveapi/guides/v4/api-error-codes.html#event-not-found"}

50
Failed to get odds: status_code 404, response body {"message":"Event not found. The event may have expired or the event id is invalid.","error_code":"EVENT_NOT_FOUND","details_url":"https://the-odds-api.com/liveapi/guides/v4/api-error-codes.html#event-not-found"}

100
150
200
250
Failed to get odds: status_code 404, response body {"message":"Event not found. The event may have expired or the event id is invalid.","error_code":"EVENT_NOT_FOUND","details_url":"ht

In [545]:
odds_data = pd.DataFrame({
    'event_id': event_id_m,
    'date': dates,
    'o/u': names,
    'player': descriptions,
    'price': prices,
    'line': points
})

In [546]:
odds_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32023 entries, 0 to 32022
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   event_id  32023 non-null  object 
 1   date      32023 non-null  object 
 2   o/u       32023 non-null  object 
 3   player    32023 non-null  object 
 4   price     32023 non-null  float64
 5   line      32023 non-null  float64
dtypes: float64(2), object(4)
memory usage: 1.5+ MB


In [548]:
odds_data.to_csv('point_odds_data.csv')