In [1]:
from nba_api.stats.endpoints import leaguegamefinder, commonplayerinfo, teamdetails
from nba_api.stats.static import players, teams
import pandas as pd
import json
from time import sleep

In [2]:
from datetime import datetime

def is_valid_game(game):
    # --- Filter by season ---
    if str(game['SEASON_ID']).startswith("1"):
        return False
    
    # --- Filter by date ---
    date_str = game['GAME_DATE']  # e.g., '2020-07-15'
    date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    month = date_obj.month
    year = date_obj.year

    # Allowed bubble months & COVID special case
    allowed_special_cases = [
        (7, 2020),
        (8, 2020),
        (9, 2020),
        (10, 2020),
        (7, 2021)
    ]

    if (month in [7,8,9] and (month, year) not in allowed_special_cases):
        return False
    
    return True


In [None]:
players_json = {}

all_players = players.get_players()

for player in all_players:
    player_info = None
    game_finder = None
    player_name = player['full_name']
    player_id = player['id']

    if int(player_id) == 1626122:
        print(f"Skipping {player_name}, {player_id}")
        continue
    
    while player_info is None:
        try:
            player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id, timeout=5)
            player_info_df = player_info.get_data_frames()[0]
            player_profile = player_info_df.to_dict(orient='records')[0]
        except:
            print(f"Skipped player info: {player_name}, {player_id}")
            sleep(2)
    while game_finder is None:
        try:
            game_finder = leaguegamefinder.LeagueGameFinder(player_id_nullable=player_id, timeout=5)
            games_df = game_finder.get_data_frames()[0]
        except:
            print(f"Skipped game logs: {player_name}, {player_id}")
            sleep(2)

    games = []

    for _, game in games_df.iterrows():
        if not is_valid_game(game):
            continue
        games.append(game.to_dict())

    players_json[player_id] = {
        "profile": player_profile,
        "games": games
    }

    print(f"Finished player: {player_name}, {player_id}")

In [4]:
# for player in all_players:
#     player_info = None
#     game_finder = None
#     player_name = player['full_name']
#     player_id = player['id']
#     if player_id in players_json or int(player_id) == 1626122:
#         print(f"Skipping {player_name}, {player_id}")
#         continue
    
#     while player_info is None:
#         try:
#             player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id, timeout=5)
#             player_info_df = player_info.get_data_frames()[0]
#             player_profile = player_info_df.to_dict(orient='records')[0]
#         except:
#             print(f"Skipped player info: {player_name}, {player_id}")
#             sleep(2)
#     while game_finder is None:
#         try:
#             game_finder = leaguegamefinder.LeagueGameFinder(player_id_nullable=player_id, timeout=5)
#             games_df = game_finder.get_data_frames()[0]
#         except:
#             print(f"Skipped game logs: {player_name}, {player_id}")
#             sleep(2)

#     games = []

#     for _, game in games_df.iterrows():
#         if not is_valid_game(game):
#             continue
#         games.append(game.to_dict())

#     players_json[player_id] = {
#         "profile": player_profile,
#         "games": games
#     }

#     print(f"Finished player: {player_name}, {player_id}")
#     sleep(1)

In [5]:
teams_json = {}

all_teams = teams.get_teams()

for team in all_teams:
    team_name = team['full_name']
    team_id = team['id']

    try:
        team_info = teamdetails.TeamDetails(team_id=team_id, timeout=3)
        team_info_df = team_info.get_data_frames()[0]
        team_profile = team_info_df.to_dict(orient='records')[0]
    except:
        print(f"Skipped team info: {team_name}")
        continue

    try:
        game_finder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, timeout=3)
        games_df = game_finder.get_data_frames()[0]
    except:
        print(f"Skipped game logs: {team_name}")
        continue

    games = []

    for _, game in games_df.iterrows():
        if not is_valid_game(game):
            continue
        games.append(game.to_dict())

    teams_json[team_id] = {
        "profile": team_profile,
        "games": games
    }

    print(f"Finished team: {team_name}")
    sleep(0.5)


Finished team: Atlanta Hawks
Finished team: Boston Celtics
Finished team: Cleveland Cavaliers
Finished team: New Orleans Pelicans
Finished team: Chicago Bulls
Finished team: Dallas Mavericks
Finished team: Denver Nuggets
Finished team: Golden State Warriors
Finished team: Houston Rockets
Finished team: Los Angeles Clippers
Finished team: Los Angeles Lakers
Finished team: Miami Heat
Finished team: Milwaukee Bucks
Finished team: Minnesota Timberwolves
Finished team: Brooklyn Nets
Finished team: New York Knicks
Finished team: Orlando Magic
Finished team: Indiana Pacers
Finished team: Philadelphia 76ers
Finished team: Phoenix Suns
Finished team: Portland Trail Blazers
Finished team: Sacramento Kings
Finished team: San Antonio Spurs
Finished team: Oklahoma City Thunder
Finished team: Toronto Raptors
Finished team: Utah Jazz
Finished team: Memphis Grizzlies
Finished team: Washington Wizards
Finished team: Detroit Pistons
Finished team: Charlotte Hornets


In [6]:
with open("teams_data.json", "w") as f:
    json.dump(teams_json, f, indent=4)


In [7]:
print(len(players_json))

5033


In [None]:
players_json[1642264]

In [9]:
with open("players_data.json", "w") as f:
    json.dump(players_json, f, indent=4)

In [10]:
print(type(all_players[0]['id']))

<class 'int'>
