In [1]:
from data.api.schedule import get_schedule
from data.api.plays import get_game_plays
import pandas as pd
import os

## Data Fetch: Schedule

Working from CSV limits the number of calls to the API and is, generally, faster. This block will attempt to open a schedule file. If it the file does not exist, it will be created.

Configure the season variable if you would like to analyze a different season in the following format: 20222023 for the 2022-2023 season (for example).

In [4]:
SEASON=20222023

if os.path.exists('data/csv/schedules/{}.csv'.format(SEASON)):
    print('Schedule found, reading...')
    schedule = pd.read_csv('data/csv/schedules/20222023.csv', index_col=0)
else:
    print('Schedule not found, creating...')
    schedule = get_schedule(SEASON, "r")
    schedule.to_csv('data/csv/schedules/{}.csv'.format(SEASON))
print('Schedule ready!')

Schedule found, reading...
Schedule ready!


In [6]:
if os.path.exists('data/csv/plays/{}'.format(SEASON)):
    print("Directory for the configured season already exists.")
else:
    print("Directory for the configured season does not exist. Creating...")
    os.makedirs('data/csv/plays/{}'.format(SEASON))
    print("Directory created!")

game_ids = schedule['game_id'].to_numpy()
print("Getting game data for games played. This may take some time...")
for id in game_ids:
    plays = get_game_plays(id)
    plays.to_csv('data/csv/plays/{}/{}.csv'.format(SEASON, id))
    break

Directory for the configured season already exists.
Getting game data for games played. This may take some time...
