In [2]:
pip install nba_api

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import time

In [5]:
#Endpoints for NBA teams
from nba_api.stats.endpoints import playercareerstats
from nba_api.live.nba.endpoints import scoreboard
from nba_api.stats.endpoints import boxscorematchups
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams
from nba_api.stats.endpoints import playergamelogs
from nba_api.stats.endpoints import boxscoretraditionalv2

# Building Dataset

In [10]:
#List of target teams for games on April 2, 2023 for MVP
# These are the Team IDs for the target teams

#DF1 - target_teams = [1610612742, 1610612763, 1610612757, 1610612766, 1610612762, 1610612764]

target_teams = [1610612742, 1610612763, 1610612757, 1610612766, 1610612762, 1610612764,1610612737,1610612741,1610612750,1610612761,1610612751,1610612752]

#DF2 - target_teams = [1610612764, 1610612737, 1610612741, 1610612750, 1610612761, 1610612751, 1610612752]



gamefinder = leaguegamefinder.LeagueGameFinder()
games = gamefinder.get_data_frames()[0]
    
# filter games by season and target teams
games_by_season = games[(games.SEASON_ID == "22022")]   
target_teams_games_by_season = games_by_season[games_by_season.TEAM_ID.isin(target_teams)]
#target_teams_games_by_season = target_teams_games_by_season[target_teams_games_by_season.GAME_DATE >= '2022-10-01']
games_by_target_teams = target_teams_games_by_season.GAME_ID.unique()

#To pull historial and latest game information and needing to work with NBA API that can timeout depending on number of requests
#Process below aims to check if a game has one of the teams specified, if so, pull down the boxscore and creates a file
#Then the game files are merged together
#We had to breakup the teams into two lists due to issues pulling down large volumes of teams as once

dfs = []

for game_id in games_by_target_teams:
    try:
        df = pd.read_csv(f'{game_id}.csv')
    except FileNotFoundError:
        box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(
            end_period=10, end_range=0, game_id=game_id, range_type=0, start_period=1, start_range=0
        )
        df = box_score.get_data_frames()[0]
        df.to_csv(f'{game_id}.csv', index=False)
        time.sleep(1)

    dfs.append(df)

result_df2 = pd.concat(dfs)

print(result_df2)

       GAME_ID     TEAM_ID TEAM_ABBREVIATION     TEAM_CITY  PLAYER_ID  \
0     22201223  1610612765               DET       Detroit    1630181   
1     22201223  1610612765               DET       Detroit    1630164   
2     22201223  1610612765               DET       Detroit    1631105   
3     22201223  1610612765               DET       Detroit    1631093   
4     22201223  1610612765               DET       Detroit    1630165   
..         ...         ...               ...           ...        ...   
27  1622200001  1610612755               PHI  Philadelphia    1630196   
28  1622200001  1610612755               PHI  Philadelphia    1629619   
29  1622200001  1610612755               PHI  Philadelphia    1630243   
30  1622200001  1610612755               PHI  Philadelphia    1629369   
31  1622200001  1610612755               PHI  Philadelphia    1630676   

       PLAYER_NAME  NICKNAME START_POSITION                 COMMENT    MIN  \
0     R.J. Hampton      R.J.              F  

In [11]:
games_by_season['GAME_ID'] = games_by_season['GAME_ID'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [12]:
result_df2

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,22201223,1610612765,DET,Detroit,1630181,R.J. Hampton,R.J.,F,,3:20,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-9.0
1,22201223,1610612765,DET,Detroit,1630164,James Wiseman,James,F,,24:50,...,6.0,6.0,12.0,0.0,0.0,1.0,4.0,2.0,11.0,-31.0
2,22201223,1610612765,DET,Detroit,1631105,Jalen Duren,Jalen,C,,34:40,...,3.0,15.0,18.0,0.0,0.0,1.0,6.0,2.0,4.0,3.0
3,22201223,1610612765,DET,Detroit,1631093,Jaden Ivey,Jaden,G,,36:31,...,1.0,2.0,3.0,3.0,0.0,0.0,5.0,6.0,16.0,-30.0
4,22201223,1610612765,DET,Detroit,1630165,Killian Hayes,Killian,G,,39:16,...,2.0,6.0,8.0,7.0,0.0,1.0,5.0,0.0,26.0,-4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27,1622200001,1610612755,PHI,Philadelphia,1630196,Filip Petrusev,Filip,,DNP - Coach's Decision,,...,,,,,,,,,,
28,1622200001,1610612755,PHI,Philadelphia,1629619,Myles Powell,Myles,,DNP - Coach's Decision,,...,,,,,,,,,,
29,1622200001,1610612755,PHI,Philadelphia,1630243,Trevelin Queen,Trevelin,,DNP - Coach's Decision,,...,,,,,,,,,,
30,1622200001,1610612755,PHI,Philadelphia,1629369,Fred Sims Jr.,Fred,,DNP - Coach's Decision,,...,,,,,,,,,,


In [28]:
final_db2 = pd.merge(result_df2, games_by_season[['GAME_ID', 'GAME_DATE']], on='GAME_ID', how='inner')
final_db2 = final_db2[['GAME_ID','GAME_DATE','TEAM_ID','TEAM_ABBREVIATION','TEAM_CITY','PLAYER_ID','PLAYER_NAME','PTS','REB','STL','AST','TO','BLK','FGA','FG_PCT','FG3A','FTA','START_POSITION']]


In [29]:
final_db2

Unnamed: 0,GAME_ID,GAME_DATE,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,PTS,REB,STL,AST,TO,BLK,FGA,FG_PCT,FG3A,FTA,START_POSITION
0,22201191,2023-04-05,1610612764,WAS,Washington,1630557,Corey Kispert,12.0,2.0,1.0,1.0,1.0,0.0,12.0,0.417,8.0,2.0,F
1,22201191,2023-04-05,1610612764,WAS,Washington,1630557,Corey Kispert,12.0,2.0,1.0,1.0,1.0,0.0,12.0,0.417,8.0,2.0,F
2,22201191,2023-04-05,1610612764,WAS,Washington,1630264,Anthony Gill,4.0,5.0,0.0,0.0,0.0,0.0,4.0,0.500,1.0,2.0,F
3,22201191,2023-04-05,1610612764,WAS,Washington,1630264,Anthony Gill,4.0,5.0,0.0,0.0,0.0,0.0,4.0,0.500,1.0,2.0,F
4,22201191,2023-04-05,1610612764,WAS,Washington,1629655,Daniel Gafford,25.0,10.0,1.0,1.0,3.0,2.0,10.0,0.800,0.0,9.0,C
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24263,1622200002,2022-07-05,1610612762,UTA,Utah,26935,Darryl Morsell,,,,,,,,,,,
24264,1622200002,2022-07-05,1610612762,UTA,Utah,26935,Darryl Morsell,,,,,,,,,,,
24265,1622200002,2022-07-05,1610612762,UTA,Utah,26936,Borisa Simanic,,,,,,,,,,,
24266,1622200002,2022-07-05,1610612762,UTA,Utah,26936,Borisa Simanic,,,,,,,,,,,


In [31]:
df_unique = final_db2.drop_duplicates(subset=['GAME_ID', 'PLAYER_ID'])
df_unique

Unnamed: 0,GAME_ID,GAME_DATE,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,PTS,REB,STL,AST,TO,BLK,FGA,FG_PCT,FG3A,FTA,START_POSITION
0,22201191,2023-04-05,1610612764,WAS,Washington,1630557,Corey Kispert,12.0,2.0,1.0,1.0,1.0,0.0,12.0,0.417,8.0,2.0,F
2,22201191,2023-04-05,1610612764,WAS,Washington,1630264,Anthony Gill,4.0,5.0,0.0,0.0,0.0,0.0,4.0,0.500,1.0,2.0,F
4,22201191,2023-04-05,1610612764,WAS,Washington,1629655,Daniel Gafford,25.0,10.0,1.0,1.0,3.0,2.0,10.0,0.800,0.0,9.0,C
6,22201191,2023-04-05,1610612764,WAS,Washington,1631098,Johnny Davis,20.0,7.0,2.0,3.0,4.0,2.0,18.0,0.444,4.0,5.0,G
8,22201191,2023-04-05,1610612764,WAS,Washington,1630692,Jordan Goodwin,10.0,2.0,4.0,6.0,1.0,1.0,10.0,0.400,3.0,2.0,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24253,1622200002,2022-07-05,1610612762,UTA,Utah,26953,D.J. Funderburk,,,,,,,,,,,
24256,1622200002,2022-07-05,1610612762,UTA,Utah,1630548,Johnny Juzang,,,,,,,,,,,
24259,1622200002,2022-07-05,1610612762,UTA,Utah,1630627,Isaiah Miller,,,,,,,,,,,
24262,1622200002,2022-07-05,1610612762,UTA,Utah,26935,Darryl Morsell,,,,,,,,,,,


In [20]:
df_unique.to_csv('result_part1.csv', index=False)

In [21]:
# Import result_part1.csv into a dataframe
df1 = pd.read_csv('result_part1.csv')

# Import result_part2.csv into a dataframe
df2 = pd.read_csv('result_part2.csv')

In [22]:
merged_df = pd.concat([df1, df2], axis=0)

In [23]:
merged_df

Unnamed: 0,GAME_ID,GAME_DATE,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,PTS,REB,STL,AST,TO,BLK,FGA,FG_PCT,FG3A,FTA,START_POSITION
0,22201191,2023-04-05,1610612764,WAS,Washington,1630557,Corey Kispert,12.0,2.0,1.0,1.0,1.0,0.0,12.0,0.417,8.0,2.0,F
1,22201191,2023-04-05,1610612764,WAS,Washington,1630264,Anthony Gill,4.0,5.0,0.0,0.0,0.0,0.0,4.0,0.500,1.0,2.0,F
2,22201191,2023-04-05,1610612764,WAS,Washington,1629655,Daniel Gafford,25.0,10.0,1.0,1.0,3.0,2.0,10.0,0.800,0.0,9.0,C
3,22201191,2023-04-05,1610612764,WAS,Washington,1631098,Johnny Davis,20.0,7.0,2.0,3.0,4.0,2.0,18.0,0.444,4.0,5.0,G
4,22201191,2023-04-05,1610612764,WAS,Washington,1630692,Jordan Goodwin,10.0,2.0,4.0,6.0,1.0,1.0,10.0,0.400,3.0,2.0,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13704,1522200006,2022-07-08,1610612749,MIL,Milwaukee,26954,Sylvain Francisco,,,,,,,,,,,
13705,1522200006,2022-07-08,1610612749,MIL,Milwaukee,26955,Evans Ganapamo,,,,,,,,,,,
13706,1522200006,2022-07-08,1610612749,MIL,Milwaukee,1629787,Marcus Graves,,,,,,,,,,,
13707,1522200006,2022-07-08,1610612749,MIL,Milwaukee,1631129,Iverson Molinar,,,,,,,,,,,


In [24]:
merged_df = merged_df.drop_duplicates(subset=['GAME_ID','GAME_DATE', 'TEAM_ID','PLAYER_ID'])
merged_df


Unnamed: 0,GAME_ID,GAME_DATE,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,PTS,REB,STL,AST,TO,BLK,FGA,FG_PCT,FG3A,FTA,START_POSITION
0,22201191,2023-04-05,1610612764,WAS,Washington,1630557,Corey Kispert,12.0,2.0,1.0,1.0,1.0,0.0,12.0,0.417,8.0,2.0,F
1,22201191,2023-04-05,1610612764,WAS,Washington,1630264,Anthony Gill,4.0,5.0,0.0,0.0,0.0,0.0,4.0,0.500,1.0,2.0,F
2,22201191,2023-04-05,1610612764,WAS,Washington,1629655,Daniel Gafford,25.0,10.0,1.0,1.0,3.0,2.0,10.0,0.800,0.0,9.0,C
3,22201191,2023-04-05,1610612764,WAS,Washington,1631098,Johnny Davis,20.0,7.0,2.0,3.0,4.0,2.0,18.0,0.444,4.0,5.0,G
4,22201191,2023-04-05,1610612764,WAS,Washington,1630692,Jordan Goodwin,10.0,2.0,4.0,6.0,1.0,1.0,10.0,0.400,3.0,2.0,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13704,1522200006,2022-07-08,1610612749,MIL,Milwaukee,26954,Sylvain Francisco,,,,,,,,,,,
13705,1522200006,2022-07-08,1610612749,MIL,Milwaukee,26955,Evans Ganapamo,,,,,,,,,,,
13706,1522200006,2022-07-08,1610612749,MIL,Milwaukee,1629787,Marcus Graves,,,,,,,,,,,
13707,1522200006,2022-07-08,1610612749,MIL,Milwaukee,1631129,Iverson Molinar,,,,,,,,,,,


In [26]:
#This creates a final clean version of the box score data for target teams

merged_df.to_csv('merged_df.csv', index=False)