### Data for today's games

In [2]:
from nba_api.live.nba.endpoints import scoreboard
import json

games = scoreboard.ScoreBoard()

games_data = games.get_dict()

all_games = []

for game in games_data['scoreboard']['games']:
    with open('games.json', 'w') as f:
        json.dump(game, f, indent=4)
    # Extract game details
    start_time = game['gameStatusText']

    # Home team details
    home_team = game['homeTeam']
    home_stats = {
        "team_name": home_team["teamName"],
        "team_city" : home_team["teamCity"],
        "team_id": home_team["teamId"],
        "abbreviation": home_team["teamTricode"],
        "wins": home_team["wins"],
        "losses": home_team["losses"],
        "score": home_team["score"]
    }

    # Away team details
    away_team = game['awayTeam']
    away_stats = {
        "team_name": away_team["teamName"],
        "team_city" : away_team["teamCity"],
        "team_id": away_team["teamId"],
        "abbreviation": away_team["teamTricode"],
        "wins": away_team["wins"],
        "losses": away_team["losses"],
        "score": away_team["score"]
    }

    # Store game data in a dictionary
    game_info = {
        "start_time": start_time,
        "home_team": home_stats,
        "away_team": away_stats
    }

    all_games.append(game_info)

# Convert to JSON format if needed
games_json = json.dumps(all_games, indent=4)

# Print JSON data (optional)
print(games_json)

[
    {
        "start_time": "Q3 3:56",
        "home_team": {
            "team_name": "Jazz",
            "team_city": "Utah",
            "team_id": 1610612762,
            "abbreviation": "UTA",
            "wins": 16,
            "losses": 63,
            "score": 72
        },
        "away_team": {
            "team_name": "Trail Blazers",
            "team_city": "Portland",
            "team_id": 1610612757,
            "abbreviation": "POR",
            "wins": 35,
            "losses": 44,
            "score": 73
        }
    },
    {
        "start_time": "Q2 10:52",
        "home_team": {
            "team_name": "Warriors",
            "team_city": "Golden State",
            "team_id": 1610612744,
            "abbreviation": "GSW",
            "wins": 47,
            "losses": 32,
            "score": 32
        },
        "away_team": {
            "team_name": "Spurs",
            "team_city": "San Antonio",
            "team_id": 1610612759,
            "abbreviatio

### Data for past games

In [12]:
from datetime import date
import nba_api.stats.endpoints as endpoints
from nba_api.live.nba.endpoints import boxscore

game_date = date(2025, 4, 13)
games = endpoints.scoreboardv2.ScoreboardV2(game_date=game_date)
games_data = games.get_data_frames()[0]

for game_id in games_data['GAME_ID']:
    # Get BoxScore, boxscore.BoxScoreV2 only works for games from the 2020 season and beyond
    box = boxscore.BoxScore(game_id) 
    game_data = box.game.get_dict()

    # Extract team scores
    home_team = game_data['homeTeam']['teamName']
    away_team = game_data['awayTeam']['teamName']
    home_score = game_data['homeTeam']['score']
    away_score = game_data['awayTeam']['score']

    # Print the result
    print(f"{away_team} ({away_score}) @ {home_team} ({home_score})")

Magic (105) @ Hawks (117)
Hornets (86) @ Celtics (93)
Knicks (113) @ Nets (105)
Pacers (126) @ Cavaliers (118)
Wizards (119) @ Heat (118)
Bulls (122) @ 76ers (102)
Pistons (133) @ Bucks (140)
Nuggets (126) @ Rockets (111)
Mavericks (97) @ Grizzlies (132)
Jazz (105) @ Timberwolves (116)
Thunder (115) @ Pelicans (100)
Raptors (118) @ Spurs (125)
Clippers (124) @ Warriors (119)
Lakers (81) @ Trail Blazers (109)
Suns (98) @ Kings (109)


### Boxscore For a Game

In [9]:
from datetime import date
import nba_api.stats.endpoints as endpoints
from nba_api.live.nba.endpoints import boxscore

game_date = date(2024, 12, 16)
games = endpoints.scoreboardv2.ScoreboardV2(game_date=game_date)
games_data = games.get_data_frames()[0]

# Get stats for the first game in the list
# game_id = games_data['GAME_ID'][2]
game_id = '0021800021'

# Get all stats using BoxScoreTraditionalV2
boxscore = endpoints.boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
all_data_frames = boxscore.get_data_frames()

with open('boxscore.txt', 'w') as file:
    for df in all_data_frames:
        file.write(df.to_string())
        file.write('\n\n')

# Start Database Population Testing

### How to get the season from the game id

In [4]:
def get_season(game_id):
    """
    Determines the NBA season based on the given game_id.

    The game_id follows a pattern where:
    - A game_id starting with '00246' to '00299' corresponds to seasons from 1946-47 to 1999-00.
    - A game_id starting with '00200' to '00224' corresponds to seasons from 2000-01 to 2024-25.

    Logic:
    - Extract the 4th and 5th digits of game_id (game_year).
    - If game_year is between 46 and 99, it belongs to the 1900s (1946-47 to 1999-00).
    - Otherwise, it belongs to the 2000s (2000-01 onward).
    - The output is formatted as "YYYY-YY", where YY represents the last two digits of the next year.

    Parameters:
    game_id (str): The unique identifier for the game.

    Returns:
    string: The NBA season in the format 'YYYY-YY' (e.g., '1999-00').
    """

    game_year = int(game_id[3:5])  # Extracts the season identifier
    if 46 <= game_year <= 99:
        start_year = game_year + 1900
    else:
        start_year = game_year + 2000

    end_year_short = (start_year + 1) % 100  # Get last two digits of the next year
    return f"{start_year}-{end_year_short:02d}"


game_id = '0024600001'
print(get_season(game_id))  # Output: 1946-47
game_id = '0029900001'
print(get_season(game_id))  # Output: 1999-00
game_id = '0020000001'
print(get_season(game_id))  # Output: 2000-01
game_id = '0022400001'
print(get_season(game_id))  # Output: 2024-25

1946-47
1999-00
2000-01
2024-25


### Determine the type of game based on the game id

#### XXXYYGGGGG
XXX: Game type identifier
- 001 = Preseason
- 002 = Regular Season
- 003 = All-Star Game (Also includes Rising Stars games)
- 004 = Playoffs
- 005 = Play-In Tournament (introduced in 2020)
- 006 = NBA Cup Final (introduced in 2023)

YY: Season year indicator
- 46 -> 99 = 1946-47 -> 1999-00
- 00 -> 24 = 2000-01 -> 2024-2025

GGGGG: Unique game number

### Get the Home and Away Team Given Game ID

In [5]:
from nba_api.stats.endpoints import boxscoresummaryv2

def get_home_away_team(game_id):
    boxscore = boxscoresummaryv2.BoxScoreSummaryV2(game_id=game_id)
    game_data = boxscore.get_data_frames()[0]  # Game summary data

    home_team_id = game_data['HOME_TEAM_ID'].iloc[0]  # Extract home team ID
    away_team_id = game_data['VISITOR_TEAM_ID'].iloc[0]  # Extract away team ID

    return home_team_id, away_team_id

game_id = "0022401223"
home_team_id, away_team_id = get_home_away_team(game_id)
print("Home Team ID:", home_team_id)
print("Away Team ID:", away_team_id)

Home Team ID: 1610612761
Away Team ID: 1610612741


### Extract Team Data

In [6]:
import pandas as pd

def fill_teams_df(game_id, team_stats, teams_df):

    season_year = get_season(game_id)
    team_ids = team_stats['TEAM_ID'].unique()

    # Get home team data
    team_one_row = team_stats[team_stats['TEAM_ID'] == team_ids[0]].iloc[0]
    team_one_location = team_one_row['TEAM_CITY']
    team_one_name = team_one_row['TEAM_NAME']
    team_one_abbrev = team_one_row['TEAM_ABBREVIATION']

    # Get away team data
    team_two_row = team_stats[team_stats['TEAM_ID'] == team_ids[1]].iloc[0]
    team_two_location = team_two_row['TEAM_CITY']
    team_two_name = team_two_row['TEAM_NAME']
    team_two_abbrev = team_two_row['TEAM_ABBREVIATION']

    # Convert the data to a data frame and concatenate it with the existing teams_df
    new_rows = pd.DataFrame([
        {'team_id': team_ids[0], 'season_year': season_year,
        'team_location': team_one_location, 'team_name': team_one_name,
        'team_abbreviation': team_one_abbrev},

        {'team_id': team_ids[1], 'season_year': season_year,
        'team_location': team_two_location, 'team_name': team_two_name,
        'team_abbreviation': team_two_abbrev}
    ])

    # Ensure uniqueness before concatenation (set lookup is O(1) time complexity)
    existing_keys = set(zip(teams_df['team_id'], teams_df['season_year']))
    new_rows_filtered = new_rows[~new_rows.apply(lambda row: (row['team_id'], row['season_year']) in existing_keys, axis=1)]

    # Concatenate only if new unique rows exist
    if not new_rows_filtered.empty:
        teams_df = pd.concat([teams_df, new_rows_filtered], ignore_index=True)

    return teams_df

### Extract Player Data

In [7]:
def fill_players_df(player_stats, players_df):

    player_ids = player_stats['PLAYER_ID'].unique()

    for player_id in player_ids:
        # Get the player data
        player_row = player_stats[player_stats['PLAYER_ID'] == player_id].iloc[0]
        full_name = player_row['PLAYER_NAME']
        name_parts = full_name.split(" ", 1)  # Split at the first space
        player_first_name = name_parts[0]  # First name (everything before the first space)
        player_last_name = name_parts[1] if len(name_parts) > 1 else ""  # Last name (everything after), or empty if no space

        # Convert the data to a data frame and concatenate it with the existing players_df
        new_row = pd.DataFrame([
            {'player_id': player_id,
            'player_first_name': player_first_name, 'player_last_name': player_last_name
            },
        ])

        # Ensure uniqueness before concatenation (set lookup is O(1) time complexity)
        existing_keys = set(zip(players_df['player_id']))
        new_rows_filtered = new_row[~new_row.apply(lambda row: (row['player_id']) in existing_keys, axis=1)]

        # Concatenate only if new unique rows exist
        if not new_rows_filtered.empty:
            players_df = pd.concat([players_df, new_rows_filtered], ignore_index=True)

    return players_df

### Extract Game Metadata

In [8]:
def fill_games_df(game_id, game_date, games_df):
    season_year = get_season(game_id)
    home_team_id, away_team_id = get_home_away_team(game_id)

    # Convert the data to a data frame and concatenate it with the existing games_df
    new_row = pd.DataFrame([
        {'game_id': game_id, 'season_year': season_year, 'game_date': game_date,
        'home_team_id': home_team_id, 'away_team_id': away_team_id}
    ])

    # Ensure uniqueness before concatenation (set lookup is O(1) time complexity)
    existing_keys = set(zip(games_df['game_id'], games_df['season_year']))
    new_rows_filtered = new_row[~new_row.apply(lambda row: (row['game_id'], row['season_year']) in existing_keys, axis=1)]

    # Concatenate only if new unique rows exist
    if not new_rows_filtered.empty:
        games_df = pd.concat([games_df, new_rows_filtered], ignore_index=True)

    return games_df

### Extract Player Stats

In [9]:
def get_player_game_stats(game_id, player_stats, player_game_stats_df):
    player_ids = player_stats['PLAYER_ID'].unique()

    columns_to_keep = [
        "MIN", "FGM", "FGA", "FG_PCT", "FG3M", "FG3A", "FG3_PCT", 
        "FTM", "FTA", "FT_PCT", "OREB", "DREB", "REB", "AST", "STL", 
        "BLK", "TO", "PF", "PTS", "PLUS_MINUS"
    ]

    for player_id in player_ids:
        player_row = player_stats[player_stats['PLAYER_ID'] == player_id].iloc[0]

        # Get the player stats in JSON format
        player_stats_json = player_row.to_dict()

        # Filter the data
        player_stats_json = [
            {key: player_stats_json[key] for key in columns_to_keep if key in player_stats_json} 
        ]

        # Convert the data to a data frame and concatenate it with the existing player_game_stats_df
        new_row = pd.DataFrame([
            {'game_id': game_id, 'player_id': player_id, 'team_id': player_row['TEAM_ID'], 
            'player_game_stats': player_stats_json}
        ])

        # Ensure uniqueness before concatenation (set lookup is O(1) time complexity)
        existing_keys = set(zip(player_game_stats_df['game_id'], player_game_stats_df['player_id']))
        new_rows_filtered = new_row[~new_row.apply(lambda row: (row['game_id'], row['player_id']) in existing_keys, axis=1)]

        # Concatenate only if new unique rows exist
        if not new_rows_filtered.empty:
            player_game_stats_df = pd.concat([player_game_stats_df, new_rows_filtered], ignore_index=True)

    return player_game_stats_df

### Full Data Extraction

In [None]:
from datetime import date
import pandas as pd
import nba_api.stats.endpoints as endpoints
from nba_api.live.nba.endpoints import boxscore

teams_df = pd.DataFrame(columns=['team_id', 'season_year', 'team_location', 'team_name', 'team_abbreviation'])
players_df = pd.DataFrame(columns=['player_id', 'player_first_name', 'player_last_name'])
games_df = pd.DataFrame(columns=['game_id', 'season_year', 'game_date', 'home_team_id', 'away_team_id'])
player_game_stats_df = pd.DataFrame(columns=['game_id', 'player_id', 'team_id', 'player_game_stats'])

game_date = date(2024, 12, 16)
games = endpoints.scoreboardv2.ScoreboardV2(game_date=game_date)
games_data = games.get_data_frames()[0]

# Get stats for the first game in the list
game_id = games_data['GAME_ID'][2]

# Get all stats using BoxScoreTraditionalV2
boxscore = endpoints.boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
player_stats = boxscore.get_data_frames()[0]
team_stats = boxscore.get_data_frames()[1]

teams_df = fill_teams_df(game_id, team_stats, teams_df)
players_df = fill_players_df(player_stats, players_df)
games_df = fill_games_df(game_id, game_date, games_df)
player_game_stats_df = get_player_game_stats(game_id, player_stats, player_game_stats_df)

print(teams_df)
print('\n')
print(players_df)
print('\n')
print(games_df)
print('\n')
print(player_game_stats_df)

      team_id season_year team_location team_name team_abbreviation
0  1610612761     2024-25       Toronto   Raptors               TOR
1  1610612741     2024-25       Chicago     Bulls               CHI


   player_id player_first_name player_last_name
0    1630245               Ayo          Dosunmu
1    1630172           Patrick         Williams
2     202696            Nikola          Vučević
3    1629632              Coby            White
4    1630581              Josh           Giddey
5    1628366             Lonzo             Ball
6    1629659             Talen    Horton-Tucker
7    1641824             Matas          Buzelis
8    1630188             Jalen            Smith
9    1641763            Julian         Phillips
10   1628975             Jevon           Carter
11   1628470            Torrey            Craig
12   1630537             Chris           Duarte
13   1630534             Ochai           Agbaji
14   1642367          Jonathan            Mogbo
15   1627751             J

# End

### Team stats for each game in a specified season

In [None]:
from nba_api.stats.endpoints import teamgamelog
import pandas as pd

# Dallas Mavericks team ID
team_id = 1610612742

# Get team game log for the entire season
team_game_log = teamgamelog.TeamGameLog(team_id=team_id, season='2024-25')
team_game_log_data = team_game_log.get_data_frames()[0]

with open('mavericks_season_game_log.txt', 'w') as file:
    file.write(team_game_log_data.to_string())

### Player data

In [None]:
from nba_api.stats.endpoints import playercareerstats

career = playercareerstats.PlayerCareerStats(player_id=2544)

lebron_stats = career.get_data_frames()[0]

print(lebron_stats)

    PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0        2544   2003-04        00  1610612739               CLE        19.0   
1        2544   2004-05        00  1610612739               CLE        20.0   
2        2544   2005-06        00  1610612739               CLE        21.0   
3        2544   2006-07        00  1610612739               CLE        22.0   
4        2544   2007-08        00  1610612739               CLE        23.0   
5        2544   2008-09        00  1610612739               CLE        24.0   
6        2544   2009-10        00  1610612739               CLE        25.0   
7        2544   2010-11        00  1610612748               MIA        26.0   
8        2544   2011-12        00  1610612748               MIA        27.0   
9        2544   2012-13        00  1610612748               MIA        28.0   
10       2544   2013-14        00  1610612748               MIA        29.0   
11       2544   2014-15        00  1610612739       