In [1]:
import pandas as pd
import time

from datetime import datetime
from nba_api.stats.endpoints import LeagueGameLog
from nba_api.stats.endpoints import BoxScoreTraditionalV3
from requests.exceptions import RequestException

season_types = [
    "Regular Season",
    "Pre Season",
    "Playoffs",
    "Preseason",
]


In [2]:
def get_league_game_log_from_season(season):
    print(f"Retrieving league game log from {season}...")
    dfs = []
    for season_type in season_types:
        while True:
            try:
                df = LeagueGameLog(
                    season=season,
                    season_type_all_star=season_type,
                    timeout=3,
                ).get_data_frames()[0]
                df.columns = df.columns.to_series().apply(lambda x: x.lower())
                df = pd.merge(
                    df,
                    df,
                    on=["season_id", "game_id", "game_date", "min"],
                    suffixes=["_home", "_away"],
                )
                df = df[
                    (df["matchup_home"].str.contains("vs."))
                    & (df["team_name_home"] != df["team_name_away"])
                ]
                df["season_type"] = season_type
                dfs.append(df)
                break
            except RequestException:
                continue
            except ValueError:
                return None
            except KeyError:
                break
    df = pd.concat(dfs, ignore_index=True)
    return df

In [3]:
this_year = datetime.now().year
years = list(range(2021, this_year + 1))

In [4]:
dfs = []
for year in years:
    dfs.append(get_league_game_log_from_season(year))
    time.sleep(3)
dfs = [df for df in dfs if df is not None]
df = pd.concat(dfs, ignore_index=True).reset_index(drop=True)

Retrieving league game log from 2021...
Retrieving league game log from 2022...
Retrieving league game log from 2023...


  df = pd.concat(dfs, ignore_index=True)


Retrieving league game log from 2024...


  df = pd.concat(dfs, ignore_index=True).reset_index(drop=True)


In [6]:
df.to_csv('games.csv', index=False)

In [7]:
def get_box_score_traditional_helper(game_id):
    dfs = {
        t: []
        for t in [
            "player_stats",
            "starter_bench_stats",
            "team_stats"
        ]
    }
    while True:
        try:
            res_dfs = BoxScoreTraditionalV3(
                game_id=game_id, timeout=3
            ).get_data_frames()
            for df in res_dfs:
                df.columns = df.columns.to_series().apply(lambda x: x.lower())
            homeid = res_dfs[2]['teamid'][0]
            awayid = res_dfs[2]['teamid'][1]
            df = res_dfs[0].copy()
            dfs["player_stats"] = df
            df = res_dfs[1].copy()
            df = pd.merge(
                df.loc[df['teamid'] == homeid],
                df.loc[df['teamid'] == awayid],
                on=["gameid", "startersbench"],
                suffixes=["_home", "_away"],
            )
            dfs["starter_bench_stats"] = df
            df = res_dfs[2].copy()
            df = pd.merge(
                df.loc[df['teamid'] == homeid],
                df.loc[df['teamid'] == awayid],
                on=["gameid"],
                suffixes=["_home", "_away"],
            )
            dfs["team_stats"] = df
            return dfs
        except RequestException:
            print('RequestException')
            continue
        except ValueError:
            return None

def get_box_score_traditional(game_ids):
    dfs = []
    for i, game_id in enumerate(game_ids):
        if i % 20 == 0:
            print(i)
        dfs.append(get_box_score_traditional_helper(game_id))
        time.sleep(3)
    dfs = [d for d in dfs if d is not None]
    return dfs

In [8]:
dfs = get_box_score_traditional(df['game_id'])

0
20
40
60
80
RequestException
100
120
140
160
180
200
220
RequestException
240
260
280
300
320
340
360
380
RequestException
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
1020
1040
1060
1080
RequestException
1100
1120
1140
RequestException
1160
RequestException
1180
1200
1220
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
1240
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
1260
RequestException
RequestException
RequestException
RequestException
RequestException
RequestException
1280
RequestException
RequestException
RequestException
RequestException
RequestException
1300
1320
1340
1360
1380
1400
1420
1440
1460
1480
1500
1520
1540
1560
RequestException
1580
1600
1620
1640
1660
1680
17

In [17]:
player_stats = pd.concat(
        [d["player_stats"] for d in dfs if d["player_stats"] is not None]
    ).reset_index(drop=True)
started_bench_stats = pd.concat(
        [d["starter_bench_stats"] for d in dfs if d["starter_bench_stats"] is not None]
    ).reset_index(drop=True)
team_stats = pd.concat(
        [d["team_stats"] for d in dfs if d["team_stats"] is not None]
    ).reset_index(drop=True)

In [18]:
player_stats.to_csv('players.csv', index=False)
started_bench_stats.to_csv('starter_bench.csv', index=False)
team_stats.to_csv('teams.csv', index=False)