In [1]:
import numpy
import pandas as pd
import requests
import nba_api
import pathlib as plib
import re
from nba_api.stats.endpoints import commonteamroster    
from nba_api.stats.library.data import teams
from nba_api.stats.library.data import team_index_id, team_index_abbreviation, team_index_nickname, team_index_full_name
from nba_api.stats.library.data import team_index_city, team_index_state, team_index_year_founded
from nba_api.stats.library.data import team_index_championship_year

In [2]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder, playbyplay
import pandas as pd
# Expand the data to remove elipses and make more readable
pd.set_option('display.max_colwidth',250)
pd.set_option('display.max_rows',250)
import numpy as np


In [4]:
##########################
######### TEAMS ##########
##########################
# Get list of all teams
nba_teams = teams.get_teams()

# User input teams + date of desired game
team1_nickname = input("Team 1 nickname:")
team2_nickname = input("Team 2 nickname:")
game_date = input("Date of game:")

# Get team_id for given teams
team1 = [team for team in nba_teams if team['nickname'] == team1_nickname][0]
team1_id = team1['id']
team2 = [team for team in nba_teams if team['nickname'] == team2_nickname][0]
team2_abbreviation = team2['abbreviation']

In [5]:
##########################
######### GAMES ##########
##########################
# Get list of team1's games
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team1_id)
games = gamefinder.get_data_frames()[0]
#print(games)

# Get game_id where team1 played team2 on the given date
targetGame = games[(games.MATCHUP.str.contains(team2_abbreviation)) & (games.GAME_DATE.str.contains(game_date))]
print(targetGame)
game_id = targetGame.GAME_ID
print(game_id)
# Split MATCHUP value to determine home_team and visitor_team based on `vs.` or `@`
matchup = targetGame.iloc[0]['MATCHUP']
split = matchup.split(' ')
if split[1] == 'vs.':
    home_team = split[0]
    visitor_team = split[2]
else:
    home_team = split[2]
    visitor_team = split[0]


  SEASON_ID     TEAM_ID TEAM_ABBREVIATION               TEAM_NAME     GAME_ID  \
0     22022  1610612757               POR  Portland Trail Blazers  0022200843   

    GAME_DATE      MATCHUP WL  MIN  PTS  ...  FT_PCT  OREB  DREB  REB  AST  \
0  2023-02-10  POR vs. OKC  L  241  129  ...   0.935     9    23   32   29   

   STL  BLK  TOV  PF  PLUS_MINUS  
0    3    5   18  27        -9.0  

[1 rows x 28 columns]
0    0022200843
Name: GAME_ID, dtype: object


In [6]:
##########################
#### PBP DATA CLEANUP ####
##########################
# Get playbyplay data of given game_id
df = playbyplay.PlayByPlay(game_id).get_data_frames()[0]

# Filter for EVENTMSGTYPE 1,3,13 only (FG, FT, periodEnd)
df = df.loc[df['EVENTMSGTYPE'].isin([1,3,13])]

# Remove events where there was no score update
df = df.dropna(subset=['SCORE'])

# Remove unwanted columns
df = df.drop(['GAME_ID','EVENTNUM','EVENTMSGTYPE','EVENTMSGACTIONTYPE','WCTIMESTRING','SCOREMARGIN'], axis=1)



# Split the PCTIMESTRING (gametime) to minutes/seconds columns and convert to numeric
df[['MINUTES', 'SECONDS']] = df['PCTIMESTRING'].str.split(':', expand=True)
df['MINUTES'] = pd.to_numeric(df['MINUTES'], errors='coerce')
df['SECONDS'] = pd.to_numeric(df['SECONDS'], errors='coerce')

# Drop PCTIMESTRING column (replaced by minutes/seconds)
df = df.drop('PCTIMESTRING', axis=1)



# Determine scoring team based on populated description
df['SCORINGTEAM'] = np.where(df['HOMEDESCRIPTION'].notna(), home_team,
                        np.where(df['NEUTRALDESCRIPTION'].notna(), 'Period End',
                                 np.where(df['VISITORDESCRIPTION'].notna(), visitor_team, None)))
                                 

# Drop PCTIMESTRING column (replaced by minutes/seconds)
df = df.drop(['HOMEDESCRIPTION','NEUTRALDESCRIPTION','VISITORDESCRIPTION'], axis=1)



# Create method to calculate how many points were scored based on current score vs. previous score
def get_score_difference(row):
    score1, score2 = row['SCORE'].split('-')
    if pd.notnull(row['SCORE']):
        score1 = int(score1)
        score2 = int(score2)
    else:
        return None
    if score1 > row['SCORE1']:
        return score1 - row['SCORE1']
    else:
        return score2 - row['SCORE2']

# Save calculated score into new pointsscored column
df['SCORE1'] = df['SCORE'].str.split('-').str[0].astype(int).shift(1).fillna(0)
df['SCORE2'] = df['SCORE'].str.split('-').str[1].astype(int).shift(1).fillna(0)
df['POINTSSCORED'] = df.apply(lambda x: get_score_difference(x), axis=1)


# Drop SCORE1&2 columns
df = df.drop(['SCORE1', 'SCORE2'], axis=1)



ValueError: not enough values to unpack (expected 2, got 1)

In [None]:

##########################
######### EXPORT #########
##########################
# Rearrange columns
df = df[['PERIOD', 'MINUTES', 'SECONDS', 'SCORINGTEAM', 'POINTSSCORED', 'SCORE']]

# Save name + location
file_name =  game_date.replace("-", "") + "_" + team1_nickname.lower() + "_" + team2_nickname.lower() + ".csv"
#file_path = plib.Path(r'C:\Users\Danny\git_projects\outputs\sports_analysis_outputs\+ file_name') 
file_path = "/Users/Danny/git_projects/outputs/sports_analysis_outputs/" + file_name
# Export the dataframe to a CSV file
df.to_csv(file_path, index=False)

print("EXPORT COMPLETE!")

EXPORT COMPLETE!
