## Getting boxscore using nba_api

## Goal:
Collect the box scores of every NBA regular game matchup in the 2022-23 season

Steps:
1. Define functions to properly call nba_api for boxscore data
2. Load boxscores into dataframes
3. Add/clean data
4. Archive data as local CSV

In [None]:
from nba_api.live.nba.endpoints import boxscore
import pandas as pd
import time

## Functions

### form_gameIDs

In [None]:
# function returns list of regular game IDs for given season
def form_gameIDs(season):
    #list to house game IDs
    game_IDs_list = []
    reg = '002'
    last_two_digits = str(season)[-2:]
    gameID_base = reg + last_two_digits
    for i in range(1, 1231):
        gameID = gameID_base + str(i).zfill(5)
        game_IDs_list.append(gameID)
    return game_IDs_list

### get_boxscore_dates

In [None]:
# function returns list of game dates for a (regular) season
def get_boxscore_dates(gameIDs_list):
    dates_list = []
    for i in gameIDs_list:
        match = boxscore.BoxScore(i)
        dates_list.append(match.get_dict()['game']['gameEt'])
    return dates_list

### get_boxscores

In [None]:
def get_boxscores(gameIDs):
    for i in gameIDs:
        box_call = boxscore.BoxScore(i)
        stats_key_dict.update(box_call.get_dict()['game']['awayTeam']['statistics'])
        box_df = box_df.append(stats_key_dictionary, ignore_index=True)
    
        stats_key_dictionary.update(box_call.get_dict()['game']['homeTeam']['statistics'])
        box_df = box_df.append(stats_key_dictionary, ignore_index=True)
    
        time.sleep(1) #necessary to help mitigate timeouts

### Building dataframe

In [None]:
# Test boxscore variable
test_box = boxscore.BoxScore(game_ids_list[0])

# create a blank dictionary, adopting the box score statistic's keys 
stats_key_dict = dict.fromkeys(test_box.get_dict()['game']['homeTeam']['statistics'].keys(), [])

# create blank dataframe
box_df = pd.DataFrame.from_dict(stats_key_dict, orient='index').T

print(box_df)

### Calling functions

In [None]:
# variable calling on game id function
game_ids_list = form_gameIDs(2022)
print(len(game_ids_list))

In [None]:
# variable calling on game dates function
boxscore_dates_list = get_boxscore_dates(game_ids_list)
print(len(boxscore_dates_list))

In [None]:
# calling function to gather boxscores using gameID list
get_boxscores(game_ids_list)

### Forming new list items so to add to dataframe as new columns
* Home/Away
* Game dates
* Game IDs

In [None]:
# inserting 'Home/Away' data
box_df['Home/Away'] = box_df.reset_index().apply(lambda x: 'Away' if x['index'] % 2 == 0 else 'Home', axis=1)

In [None]:
temp_dates_list = [item for item in dates_list for i in range(2)]
temp_gameid_list = [item for item in game_ids_list for i in range(2)]
box_df.insert(0, 'Date', temp_dates_list)
box_df.insert(1, 'Game ID', temp_gameid_list)

## Archiving the Data

In [None]:
# archive the data
# Save the dataframe to a file or a database of your choice (you can use any format you want)
box_df.to_csv("Spreadsheets/box_score_all.csv", index=False)

In [None]:
# Import the nba_api and pandas modules
import nba_api
import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder, boxscoretraditionalv2

# Define the season
season = "2022-23"

# Get all the regular season games for the season
games = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable="Regular Season").get_data_frames()[0]

# Create an empty dataframe to store the box score data
box_score_df = pd.DataFrame()

# Loop through the games and get the box score for each game
for game in games.itertuples():
    # Get the game id
    game_id = game.GAME_ID
    # Get the box score
    box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id).get_data_frames()[0]
    # Append the box score data to the dataframe
    box_score_df = pd.concat([box_score_df, box_score], ignore_index=True)
    print(game_id)