In [1]:
from nba_api.stats.endpoints import TeamGameLog
import pandas as pd
import time

## Collect NBA Game in Chicago from 2017 to 2024 through nba_api

In [2]:
Chicago_Bulls_id = "1610612741"
# collect game from 2017.10 to 2024.11, 2025 is season from 2024.9 to 2025.5
seasons = [f"{year}-{str(year+1)[-2:]}" for year in range(2017, 2025)] # format: 2017-18
chicago_game_data_list = []

for season in seasons:
    print(f"get data for season: {season}")
    # get  game logs for the Bulls for each season use endpoint TeamGameLog
    game_log = TeamGameLog(team_id=Chicago_Bulls_id, season=season)
    game_df = game_log.get_data_frames()[0]
    # In MATCHUP, CHI vs. DET represent CHI indicates that the Chicago Bulls are playing at home,
    # while CHI @ BKN indicates it's playing away. I only want game data in Chicago 
    game_in_chicago_df = game_df[game_df['MATCHUP'].str.startswith('CHI vs.')]
    chicago_game_data_list.append(game_in_chicago_df)

# Combine all season data into a single DataFrame
chicago_game_data_df = pd.concat(chicago_game_data_list, ignore_index=True)
# Filter and clean the data
chicago_game_data_df = chicago_game_data_df[['GAME_DATE', 'MATCHUP']].copy()

# convert GAME_DATE string like APR 11, 2018 to datetime
chicago_game_data_df['GAME_DATE'] = pd.to_datetime(chicago_game_data_df['GAME_DATE'], format='%b %d, %Y')

# sort them by date
chicago_game_data_df = chicago_game_data_df.sort_values(by='GAME_DATE').reset_index(drop=True)

print(chicago_game_data_df.head())

# save my NBA Game Date Dataset to csv
chicago_game_data_df.to_csv("NBA_in_Chicago_17_24.csv", index=False)

get data for season: 2017-18
get data for season: 2018-19
get data for season: 2019-20
get data for season: 2020-21
get data for season: 2021-22
get data for season: 2022-23
get data for season: 2023-24
get data for season: 2024-25
   GAME_DATE      MATCHUP
0 2017-10-21  CHI vs. SAS
1 2017-10-26  CHI vs. ATL
2 2017-10-28  CHI vs. OKC
3 2017-11-04  CHI vs. NOP
4 2017-11-10  CHI vs. IND
