# NBA Data Collection

This notebook collects NBA game data from the NBA API and saves it to CSV files.

In [1]:
# Import required libraries
import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder, playergamelog, teamgamelog
from nba_api.stats.static import teams, players
import time
from tqdm import tqdm
import os

In [2]:
# Create data directory if it doesn't exist
data_dir = '../data'
os.makedirs(data_dir, exist_ok=True)

In [3]:
# Get all NBA teams
teams_df = pd.DataFrame(teams.get_teams())
teams_df.to_csv(f'{data_dir}/teams.csv', index=False)
teams_df.head()

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


In [6]:
# Define seasons to collect data for
seasons = ['2022-23', '2023-24']

In [7]:
# Function to get game logs for a season
def get_game_logs(season):
    try:
        gamefinder = leaguegamefinder.LeagueGameFinder(
            season_nullable=season,
            league_id_nullable='00'  # NBA
        )
        games = gamefinder.get_data_frames()[0]
        return games
    except Exception as e:
        print(f"Error getting game logs for season {season}: {str(e)}")
        return None

In [8]:
# Collect and save game data for each season
for season in tqdm(seasons, desc="Collecting season data"):
    games_df = get_game_logs(season)
    if games_df is not None:
        games_df.to_csv(f'{data_dir}/games_{season}.csv', index=False)
        print(f"Saved {len(games_df)} games for season {season}")
    time.sleep(1)  # Rate limiting

Collecting season data:   0%|                                                                                        | 0/2 [00:00<?, ?it/s]

Saved 2790 games for season 2022-23


Collecting season data:  50%|████████████████████████████████████████                                        | 1/2 [00:01<00:01,  1.36s/it]

Saved 2795 games for season 2023-24


Collecting season data: 100%|████████████████████████████████████████████████████████████████████████████████| 2/2 [00:02<00:00,  1.27s/it]


In [9]:
# Function to get team game logs
def get_team_game_logs(team_id, season):
    try:
        team_games = teamgamelog.TeamGameLog(
            team_id=team_id,
            season=season
        )
        return team_games.get_data_frames()[0]
    except Exception as e:
        print(f"Error getting team game logs for team {team_id} season {season}: {str(e)}")
        return None

In [10]:
# Collect and save team game logs for each team and season
for season in tqdm(seasons, desc="Collecting team data"):
    for _, team in teams_df.iterrows():
        team_games = get_team_game_logs(team['id'], season)
        if team_games is not None:
            team_games.to_csv(f'{data_dir}/team_games_{team["abbreviation"]}_{season}.csv', index=False)
            print(f"Saved {len(team_games)} games for team {team['abbreviation']} season {season}")
        time.sleep(0.6)  # Rate limiting

Collecting team data:   0%|                                                                                          | 0/2 [00:00<?, ?it/s]

Saved 82 games for team ATL season 2022-23
Saved 82 games for team BOS season 2022-23
Saved 82 games for team CLE season 2022-23
Saved 82 games for team NOP season 2022-23
Saved 82 games for team CHI season 2022-23
Saved 82 games for team DAL season 2022-23
Saved 82 games for team DEN season 2022-23
Saved 82 games for team GSW season 2022-23
Saved 82 games for team HOU season 2022-23
Saved 82 games for team LAC season 2022-23
Saved 82 games for team LAL season 2022-23
Saved 82 games for team MIA season 2022-23
Saved 82 games for team MIL season 2022-23
Saved 82 games for team MIN season 2022-23
Saved 82 games for team BKN season 2022-23
Saved 82 games for team NYK season 2022-23
Saved 82 games for team ORL season 2022-23
Saved 82 games for team IND season 2022-23
Saved 82 games for team PHI season 2022-23
Saved 82 games for team PHX season 2022-23
Saved 82 games for team POR season 2022-23
Saved 82 games for team SAC season 2022-23
Saved 82 games for team SAS season 2022-23
Saved 82 ga

Collecting team data:  50%|█████████████████████████████████████████                                         | 1/2 [00:24<00:24, 24.52s/it]

Saved 82 games for team ATL season 2023-24
Saved 82 games for team BOS season 2023-24
Saved 82 games for team CLE season 2023-24
Saved 82 games for team NOP season 2023-24
Saved 82 games for team CHI season 2023-24
Saved 82 games for team DAL season 2023-24
Saved 82 games for team DEN season 2023-24
Saved 82 games for team GSW season 2023-24
Saved 82 games for team HOU season 2023-24
Saved 82 games for team LAC season 2023-24
Saved 82 games for team LAL season 2023-24
Saved 82 games for team MIA season 2023-24
Saved 82 games for team MIL season 2023-24
Saved 82 games for team MIN season 2023-24
Saved 82 games for team BKN season 2023-24
Saved 82 games for team NYK season 2023-24
Saved 82 games for team ORL season 2023-24
Saved 82 games for team IND season 2023-24
Saved 82 games for team PHI season 2023-24
Saved 82 games for team PHX season 2023-24
Saved 82 games for team POR season 2023-24
Saved 82 games for team SAC season 2023-24
Saved 82 games for team SAS season 2023-24
Saved 82 ga

Collecting team data: 100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:52<00:00, 26.29s/it]
