In [2]:
import pandas as pd
import numpy as np
from nba_api.stats.endpoints import leaguegamefinder, leaguegamelog, playergamelogs, teamgamelogs, boxscoretraditionalv2, boxscoreadvancedv2, boxscorescoringv2, boxscoredefensivev2, leaguedashteamstats
from nba_api.stats.endpoints import boxscorehustlev2, boxscoreplayertrackv2, boxscoremiscv2, boxscoreusagev2, boxscorematchupsv3, commonplayerinfo
from nba_api.stats.static import teams, players
import time
from multiprocessing import Pool, Manager
from functools import partial

from sbrscrape import Scoreboard

import random

import backoff
import sqlite3


In [4]:
boxscoreusagev2.BoxScoreUsageV2(game_id='0022400049').get_data_frames()[1]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,USG_PCT,PCT_FGM,PCT_FGA,PCT_FG3M,...,PCT_DREB,PCT_REB,PCT_AST,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS
0,22400049,1610612758,Kings,SAC,Sacramento,240.000000:00,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,22400049,1610612757,Trail Blazers,POR,Portland,240.000000:00,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [2]:
# Shared rate limiter
class RateLimiter:
    def __init__(self, calls, period):
        self.calls = calls
        self.period = period
        self.last_reset = time.time()
        self.call_count = 0

    def __call__(self):
        current_time = time.time()
        if current_time - self.last_reset > self.period:
            self.last_reset = current_time
            self.call_count = 0
        if self.call_count >= self.calls:
            sleep_time = self.period - (current_time - self.last_reset)
            if sleep_time > 0:
                time.sleep(sleep_time)
            self.last_reset = time.time()
            self.call_count = 0
        self.call_count += 1

# Step 1: Fetch game data (unchanged)
def fetch_games(seasons):
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=seasons)
    games = gamefinder.get_data_frames()[0]
    return games

# Step 2: Fetch player stats for each game
def fetch_player_stats(game_id, rate_limiter):
    rate_limiter()
    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    stats = boxscore.get_data_frames()[0]
    return game_id, stats

In [86]:
def add_team_basic_boxscores_to_db(conn, seasons):
    """This function pulls basic team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_basic_boxscores in the sqlite db"""
    
    for season in seasons:
        season_boxscores = []
        if season >= '2019-20':
            season_types = ['Regular Season', 'PlayIn', 'Playoffs']
        else:
            season_types = ['Regular Season', 'Playoffs']
        for season_type in season_types:
            team_boxscores = leaguegamelog.LeagueGameLog(season=season, season_type_all_star=season_type).get_data_frames()[0]
            season_boxscores.append(team_boxscores)
            sleep(2)
        season_df = pd.concat(season_boxscores)
        season_df['SEASON_YEAR'] = season
        season_df.drop(columns = ['VIDEO_AVAILABLE'], inplace=True)
        
        table_name = 'team_basic_stats'
        season_df.to_sql(table_name, conn, if_exists='append', index=False)
        
        sleep(3)
        
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_ID)')
    conn.commit()
    
    return None


def add_player_basic_boxscores_to_db(conn, seasons):
    """This function pulls basic team boxscores from the NBA_API package 
    and appends (or creates a new table if not exists) it to the table team_basic_boxscores in the sqlite db"""
    
    for season in seasons:
        season_boxscores = []
        if season >= '2019-20':
            season_types = ['Regular Season', 'PlayIn', 'Playoffs']
        else:
            season_types = ['Regular Season', 'Playoffs']
        for season_type in season_types:
            player_boxscores = playergamelogs.PlayerGameLogs(season_nullable=season, season_type_nullable=season_type).get_data_frames()[0]
            season_boxscores.append(player_boxscores)
            sleep(2)
        season_df = pd.concat(season_boxscores)
        season_df['SEASON_YEAR'] = season
        
        table_name = 'player_basic_stats'
        season_df.to_sql(table_name, conn, if_exists='append', index=False)
        
        sleep(3)
            
    cur = conn.cursor()
    cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY PLAYER_ID, GAME_ID)')
    conn.commit()
        
    return None



In [3]:
# Initialize SQLite database
conn = sqlite3.connect('nba_stats.db')
cursor = conn.cursor()

# Initialize rate limiter (adjust calls and period as needed)
rate_limiter = RateLimiter(calls=30, period=60)  # 2


def fetch_game_ids(season):
    rate_limiter()
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season, league_id_nullable='00')
    games = gamefinder.get_data_frames()[0]
    
    nba_games = games.loc[((games['SEASON_ID'].str.startswith('22')) | (games['SEASON_ID'].str.startswith('42')) | (games['SEASON_ID'].str.startswith('52')))]    
    return nba_games['GAME_ID'].unique()

def insert_boxscore_data(data_dict):
    for table_name, df in data_dict.items():
        df = df.drop_duplicates()
        df.to_sql(table_name, conn, if_exists='append', index=False)
        
        
def fetch_boxscore(game_id):
    data_dict = {}
    error_game_ids = []

    try:
        # Advanced
        rate_limiter()
        advanced = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
        data_dict['player_advanced_stats'] = advanced.get_data_frames()[0]
        data_dict['team_advanced_stats'] = advanced.get_data_frames()[1]

        # Scoring
        rate_limiter()
        scoring = boxscorescoringv2.BoxScoreScoringV2(game_id=game_id)
        data_dict['player_scoring_stats'] = scoring.get_data_frames()[0]
        data_dict['team_scoring_stats'] = scoring.get_data_frames()[1]

        # Hustle
        rate_limiter()
        hustle = boxscorehustlev2.BoxScoreHustleV2(game_id=game_id)
        data_dict['player_hustle_stats'] = hustle.get_data_frames()[0]
        data_dict['team_hustle_stats'] = hustle.get_data_frames()[1]

        # Tracking
        rate_limiter()
        player_track = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id=game_id)
        data_dict['player_track_stats'] = player_track.get_data_frames()[0]
        data_dict['team_track_stats'] = player_track.get_data_frames()[1]

        # Miscellaneous
        rate_limiter()
        misc = boxscoremiscv2.BoxScoreMiscV2(game_id=game_id)
        data_dict['player_miscellaneous_stats'] = misc.get_data_frames()[0]
        data_dict['team_miscellaneous_stats'] = misc.get_data_frames()[1]

        # Usage
        rate_limiter()
        usage = boxscoreusagev2.BoxScoreUsageV2(game_id=game_id)
        data_dict['player_usage_stats'] = usage.get_data_frames()[0]
        data_dict['team_usage_stats'] = usage.get_data_frames()[1]

    except AttributeError as e:
        print(f"AttributeError occurred for GAME_ID: {game_id}")
        print(f"Error message: {str(e)}")
        error_game_ids.append(game_id)
    except Exception as e:
        print(f"An unexpected error occurred for GAME_ID: {game_id}")
        print(f"Error message: {str(e)}")
        error_game_ids.append(game_id)

    return data_dict, error_game_ids
    
    
def build_database(seasons, batch_size=10):
    for season in seasons:
        print(f"Processing season: {season}")
        game_ids = fetch_game_ids(season)
        
        batch_data = {table: [] for table in ['player_advanced_stats', 'team_advanced_stats', 
                                              'player_scoring_stats', 'team_scoring_stats',
                                               'player_hustle_stats', 'team_hustle_stats',
                                               'player_track_stats', 'team_track_stats',
                                               'player_miscellaneous_stats', 'team_miscellaneous_stats',
                                               'player_usage_stats', 'team_usage_stats'
                                             ]}
        
        for i, game_id in enumerate(game_ids):
            print(f"Processing game {i+1}/{len(game_ids)} in season {season}")
            game_data, error_game_ids = fetch_boxscore(game_id)
            
            for table, df in game_data.items():
                batch_data[table].append(df)
            
            if (i + 1) % batch_size == 0 or i == len(game_ids) - 1:
                # Concatenate and insert batch data
                concatenated_data = {table: pd.concat(dfs, ignore_index=True) 
                                     for table, dfs in batch_data.items() if dfs}
                insert_boxscore_data(concatenated_data)
                
                # Clear the batch data
                batch_data = {table: [] for table in batch_data.keys()}

        print(f"Season: {season}: ", "Error_game_ids:", error_game_ids)
# # Usage
# seasons_to_fetch = ['2013-14', '2014-15', '2015-16']  # Add more seasons as needed
# build_database(seasons_to_fetch, batch_size=10)
# # Close the database connection
# conn.close()

In [45]:
def check_and_add_missing_games(seasons, db_path):
    with sqlite3.connect(db_path) as conn:
        for season in seasons:
            print(f"Checking season: {season}")
            
            # Fetch all game IDs for the season
            all_game_ids = set(fetch_game_ids(season))
            
            table_pairs = [
                ('team_advanced_stats', 'player_advanced_stats'),
                ('team_scoring_stats', 'player_scoring_stats'),
                ('team_track_stats', 'player_track_stats'),
                ('team_miscellaneous_stats', 'player_miscellaneous_stats'),
                ('team_usage_stats', 'player_usage_stats')
            ]
            
            if season > '2015-16':
                table_pairs.append(('team_hustle_stats', 'player_hustle_stats'))
            
            for team_table, player_table in table_pairs:
                print(f"Checking tables: {team_table} and {player_table}")
                
                # Get game IDs already in the database for this table pair
                id_column = 'gameId' if 'hustle' in team_table else 'GAME_ID'
                existing_game_ids = set(pd.read_sql(f"SELECT DISTINCT {id_column} FROM {player_table}", conn)[id_column])
                
                # Find missing game IDs
                missing_game_ids = all_game_ids - existing_game_ids
                
                if missing_game_ids:
                    print(f"Found {len(missing_game_ids)} missing games for {team_table} and {player_table}")
                    
                    for game_id in missing_game_ids:
                        print(f"Fetching data for game ID: {game_id}")
                        try:
                            game_data = get_game_data(team_table, game_id)
                            if game_data:
                                player_df, team_df = game_data
                                player_df.to_sql(player_table, conn, if_exists='append', index=False)
                                team_df.to_sql(team_table, conn, if_exists='append', index=False)
                                print(f"Added data for game ID {game_id} to {player_table} and {team_table}")
                            else:
                                print(f"No data available for game ID {game_id} in {team_table} and {player_table}")
                                
                            time.sleep(random.random()*2)
                        except Exception as e:
                            print(f"Error processing game ID {game_id} for {team_table} and {player_table}: {str(e)}")
                else:
                    print(f"No missing games found for {team_table} and {player_table}")

import backoff
from requests.exceptions import RequestException, Timeout

def custom_backoff():
    yield 60
    yield 120
    yield 180

@backoff.on_exception(custom_backoff, (RequestException, Timeout), max_tries=4)
def get_game_data(table, game_id):
    endpoint_map = {
        'team_advanced_stats': boxscoreadvancedv2.BoxScoreAdvancedV2,
        'team_scoring_stats': boxscorescoringv2.BoxScoreScoringV2,
        'team_hustle_stats': boxscorehustlev2.BoxScoreHustleV2,
        'team_track_stats': boxscoreplayertrackv2.BoxScorePlayerTrackV2,
        'team_miscellaneous_stats': boxscoremiscv2.BoxScoreMiscV2,
        'team_usage_stats': boxscoreusagev2.BoxScoreUsageV2
    }
    
    endpoint = endpoint_map.get(table)
    if endpoint:
        return endpoint(game_id=game_id).get_data_frames()
    return None

# Usage
seasons_to_check = ['2014-15', '2015-16']
db_path = 'nba_stats.db'

check_and_add_missing_games(seasons_to_check, db_path)

Checking season: 2014-15
Checking tables: team_advanced_stats and player_advanced_stats
No missing games found for team_advanced_stats and player_advanced_stats
Checking tables: team_scoring_stats and player_scoring_stats
No missing games found for team_scoring_stats and player_scoring_stats
Checking tables: team_track_stats and player_track_stats
Found 1 missing games for team_track_stats and player_track_stats
Fetching data for game ID: 0021400501
Added data for game ID 0021400501 to player_track_stats and team_track_stats
Checking tables: team_miscellaneous_stats and player_miscellaneous_stats
No missing games found for team_miscellaneous_stats and player_miscellaneous_stats
Checking tables: team_usage_stats and player_usage_stats
No missing games found for team_usage_stats and player_usage_stats
Checking season: 2015-16
Checking tables: team_advanced_stats and player_advanced_stats
No missing games found for team_advanced_stats and player_advanced_stats
Checking tables: team_scorin

In [44]:
boxscorehustlev2.BoxScoreHustleV2(game_id='0021600972')

<nba_api.stats.endpoints.boxscorehustlev2.BoxScoreHustleV2 at 0x1f7c5b404d0>

In [119]:
conn = sqlite3.connect('nba_stats.db')

for table_name in team_tables:
    cur = conn.cursor()
    if 'hustle' in table_name:
        cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY teamId, gameId)')
    else:
        cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY TEAM_ID, GAME_ID)')
    conn.commit()
    
conn.close()

In [120]:
conn = sqlite3.connect('nba_stats.db')

for table_name in player_tables:
    cur = conn.cursor()
    if 'hustle' in table_name:
        cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY personId, gameId)')
    else:
        cur.execute(f'DELETE FROM {table_name} WHERE rowid NOT IN (SELECT max(rowid) FROM {table_name} GROUP BY PLAYER_ID, GAME_ID)')
    conn.commit()
    
conn.close()

In [121]:
conn = sqlite3.connect('nba_stats.db')

df_hustle_player = pd.read_sql("""select * from player_hustle_stats""", conn)

df_hustle_player.loc[df_hustle_player['gameId'] == '0052000101']

Unnamed: 0,gameId,teamId,teamCity,teamName,teamTricode,teamSlug,personId,firstName,familyName,nameI,playerSlug,position,comment,jerseyNum,minutes,points,contestedShots,contestedShots2pt,contestedShots3pt,deflections,chargesDrawn,screenAssists,screenAssistPoints,looseBallsRecoveredOffensive,looseBallsRecoveredDefensive,looseBallsRecoveredTotal,offensiveBoxOuts,defensiveBoxOuts,boxOutPlayerTeamRebounds,boxOutPlayerRebounds,boxOuts
225488,52000101,1610612764,Washington,Wizards,WAS,wizards,203078,Bradley,Beal,B. Beal,bradley-beal,F,,3,35:39,22,8,3,5,2,0,0,0,0,1,1,0,0,0,0,0
225489,52000101,1610612764,Washington,Wizards,WAS,wizards,1629060,Rui,Hachimura,R. Hachimura,rui-hachimura,F,,8,16:37,8,3,1,2,0,0,0,0,1,0,1,0,0,0,0,0
225490,52000101,1610612764,Washington,Wizards,WAS,wizards,203458,Alex,Len,A. Len,alex-len,C,,27,11:39,5,7,6,1,0,0,2,4,0,0,0,0,0,0,0,0
225491,52000101,1610612764,Washington,Wizards,WAS,wizards,203526,Raul,Neto,R. Neto,raul-neto,G,,19,17:22,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0
225492,52000101,1610612764,Washington,Wizards,WAS,wizards,201566,Russell,Westbrook,R. Westbrook,russell-westbrook,G,,4,36:45,20,5,3,2,4,0,0,0,0,0,0,0,0,0,0,0
225493,52000101,1610612764,Washington,Wizards,WAS,wizards,1629655,Daniel,Gafford,D. Gafford,daniel-gafford,,,21,20:31,12,6,5,1,0,0,3,6,0,1,1,0,1,0,0,1
225494,52000101,1610612764,Washington,Wizards,WAS,wizards,202722,Davis,Bertans,D. Bertans,davis-bertans,,,42,32:46,4,12,7,5,0,0,0,0,0,0,0,0,3,1,0,3
225495,52000101,1610612764,Washington,Wizards,WAS,wizards,202397,Ish,Smith,I. Smith,ish-smith,,,14,26:23,17,3,1,2,2,0,0,0,1,0,1,0,0,0,0,0
225496,52000101,1610612764,Washington,Wizards,WAS,wizards,1628990,Chandler,Hutchison,C. Hutchison,chandler-hutchison,,,1,10:20,2,5,1,4,0,0,0,0,0,0,0,0,0,0,0,0
225497,52000101,1610612764,Washington,Wizards,WAS,wizards,201577,Robin,Lopez,R. Lopez,robin-lopez,,,15,15:50,5,7,5,2,0,0,3,6,0,0,0,0,2,2,0,2


## Add Sports Betting Odds

In [126]:
# Function to get NBA game dates for a season
def get_nba_game_dates(season):
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season)
    
    games = gamefinder.get_data_frames()[0]
    nba_games = games.loc[(games['SEASON_ID'].str.startswith('22')) | (games['SEASON_ID'].str.startswith('42'))]
    
    return sorted(list(nba_games['GAME_DATE'].unique()))

# Function to get NBA odds data
def get_nba_odds(date):
    sb = Scoreboard(sport='NBA', date=date)
    odds_data = sb.games

    return pd.DataFrame(odds_data)


def process_odds_data(odds_df, season):
    # Flatten the nested dictionaries
    for column in ['home_spread', 'home_spread_odds', 'away_spread', 'away_spread_odds', 'home_ml', 'away_ml', 'total', 'over_odds', 'under_odds']:
        if column in odds_df.columns:
            for sportsbook in ['betmgm', 'fanduel', 'caesars', 'bet365', 'draftkings', 'bet_rivers_ny']:
                odds_df[f'{column}_{sportsbook}'] = odds_df[column].apply(
                    lambda x: x.get(sportsbook) if isinstance(x, dict) else None
                )
    # Drop the original nested columns
    odds_df = odds_df.drop(columns=['home_spread', 'home_spread_odds', 'away_spread', 'away_spread_odds', 'home_ml', 'away_ml', 'total', 'over_odds', 'under_odds'], errors='ignore')
    
    # Add season column
    odds_df['season'] = season
    
    return odds_df

# Function to insert odds data into the database
def insert_odds_data(conn, odds_df):
    odds_df.to_sql('nba_odds', conn, if_exists='append', index=False)
    return None

# Main function
def main():
    # Set your season (format: YYYY-YY)
    seasons = ['2019-20', '2020-21', '2021-22', '2022-23', '2023-24']
    for season in seasons:
        # Get the game dates
        game_dates = get_nba_game_dates(season)

        # Connect to your SQLite database
        conn = sqlite3.connect('nba_stats.db')

        # Fetch and store odds data for each game date
        for date in game_dates:
            try:
                odds_df = get_nba_odds(date)
            except:
                print(f"No Games for {date}")
                continue
            if not odds_df.empty:
                # Add a column for the season
                
                processed_df = process_odds_data(odds_df, season)
                processed_df['GAME_DATE'] = date
                # Load the data into the database
                processed_df.to_sql('nba_odds', conn, if_exists='append', index=False)
                print(f"Data for {date} loaded into the database.")
            else:
                print(f"No data found for {date}")
        time.sleep(120)

    print("All data successfully loaded into the database.")
    
    
    
main()

OperationalError: table nba_odds has no column named GAME_DATE

In [125]:
sb = Scoreboard(sport='NBA', date='2024-06-17')
odds_data = sb.games
odds_data

[{'date': '2024-06-18T00:30:00+00:00',
  'status': 'Final',
  'home_team': 'Boston Celtics',
  'home_team_loc': 'Boston',
  'home_team_abbr': 'BOS',
  'home_team_rank': -1,
  'away_team': 'Dallas Mavericks',
  'away_team_loc': 'Dallas',
  'away_team_abbr': 'DAL',
  'away_team_rank': -1,
  'home_score': 106,
  'away_score': 88,
  'home_spread': {'betmgm': -6.5,
   'fanduel': -6.5,
   'caesars': -7,
   'bet365': -7,
   'draftkings': -6.5,
   'bet_rivers_ny': -6.5},
  'home_spread_odds': {'betmgm': -115,
   'fanduel': -114,
   'caesars': -105,
   'bet365': -105,
   'draftkings': -115,
   'bet_rivers_ny': -113},
  'away_spread': {'betmgm': 6.5,
   'fanduel': 6.5,
   'caesars': 7,
   'bet365': 7,
   'draftkings': 6.5,
   'bet_rivers_ny': 6.5},
  'away_spread_odds': {'betmgm': -105,
   'fanduel': -106,
   'caesars': -115,
   'bet365': -115,
   'draftkings': -105,
   'bet_rivers_ny': -110},
  'under_odds': {'betmgm': -115,
   'fanduel': -108,
   'caesars': -105,
   'bet365': -110,
   'draftki

In [17]:
sb = Scoreboard(sport='NBA', date='2019-10-30')
odds_df = pd.DataFrame(sb.games)
odds_df['home_spread'].values

array([{'fanduel': -3.5, 'caesars': 2, 'bet365': 2, 'draftkings': -7.5},
       {'fanduel': -5.5, 'caesars': -10, 'bet365': -9.5, 'draftkings': -5},
       {'fanduel': -22.5, 'caesars': -7, 'bet365': -7, 'draftkings': -15},
       {'fanduel': -10.5, 'caesars': 3.5, 'bet365': 3.5, 'draftkings': -9},
       {'fanduel': 11.5, 'caesars': -3.5, 'bet365': -3, 'draftkings': 11},
       {'fanduel': -17.5, 'caesars': -9, 'bet365': -9.5, 'draftkings': -13.5},
       {'fanduel': 5.5, 'caesars': 2, 'bet365': 1.5, 'draftkings': 5},
       {'fanduel': -3.5, 'caesars': 8, 'bet365': 8, 'draftkings': -4},
       {'fanduel': 10.5, 'caesars': -7.5, 'bet365': -7.5, 'draftkings': 9.5},
       {'fanduel': -14.5, 'caesars': -7, 'bet365': -7, 'draftkings': -14.5},
       {'fanduel': 13.5, 'caesars': -5.5, 'bet365': -5.5, 'draftkings': 12.5}],
      dtype=object)

In [31]:
sb = Scoreboard(sport='NBA', date='2019-05-30')
odds_df = pd.DataFrame(sb.games)
odds_df['home_spread'].values

array([{'fanduel': -1.5, 'caesars': -1.5, 'bet365': -2}], dtype=object)

In [33]:
def process_odds_data(odds_df, season):
    # Flatten the nested dictionaries
    for column in ['home_spread', 'home_spread_odds', 'away_spread', 'away_spread_odds', 'home_ml', 'away_ml', 'total', 'over_odds', 'under_odds']:
        if column in odds_df.columns:
            for sportsbook in ['betmgm', 'fanduel', 'caesars', 'bet365', 'draftkings', 'bet_rivers_ny']:
                odds_df[f'{column}_{sportsbook}'] = odds_df[column].apply(
                    lambda x: x.get(sportsbook) if isinstance(x, dict) else None
                )
    # Drop the original nested columns
    odds_df = odds_df.drop(columns=['home_spread', 'home_spread_odds', 'away_spread', 'away_spread_odds', 'home_ml', 'away_ml', 'total', 'over_odds', 'under_odds'], errors='ignore')
    
    # Add season column
    odds_df['season'] = season
    
    return odds_df

process_odds_data(odds_df, season='2023-24')



Unnamed: 0,date,status,home_team,home_team_loc,home_team_abbr,home_team_rank,away_team,away_team_loc,away_team_abbr,away_team_rank,home_score,away_score,home_spread_betmgm,home_spread_fanduel,home_spread_caesars,home_spread_bet365,home_spread_draftkings,home_spread_bet_rivers_ny,home_spread_odds_betmgm,home_spread_odds_fanduel,home_spread_odds_caesars,home_spread_odds_bet365,home_spread_odds_draftkings,home_spread_odds_bet_rivers_ny,away_spread_betmgm,...,away_ml_betmgm,away_ml_fanduel,away_ml_caesars,away_ml_bet365,away_ml_draftkings,away_ml_bet_rivers_ny,total_betmgm,total_fanduel,total_caesars,total_bet365,total_draftkings,total_bet_rivers_ny,over_odds_betmgm,over_odds_fanduel,over_odds_caesars,over_odds_bet365,over_odds_draftkings,over_odds_bet_rivers_ny,under_odds_betmgm,under_odds_fanduel,under_odds_caesars,under_odds_bet365,under_odds_draftkings,under_odds_bet_rivers_ny,season
0,2019-05-31T01:00:00+00:00,Final,Toronto Raptors,Toronto,TOR,-1,Golden State Warriors,Golden State,GS,-1,118,109,,-1.5,-1.5,-2,,,,-110,-110,-111,,,,...,,102,105,115,,,,212.5,213,212.5,,,,-110,-110,-111,,,,-110,-110,-111,,,2023-24


In [40]:
with sqlite3.connect('../data/nba_stats.db') as conn:
    # Get existing dates
    query = """
    SELECT *
    FROM nba_odds 
    """
    odds_df = pd.read_sql_query(
        query, 
        conn)
        

odds_df


Unnamed: 0,GAME_DATE,status,home_team,home_team_loc,home_team_abbr,home_team_rank,away_team,away_team_loc,away_team_abbr,away_team_rank,home_score,away_score,home_spread_betmgm,home_spread_fanduel,home_spread_caesars,home_spread_bet365,home_spread_draftkings,home_spread_bet_rivers_ny,home_spread_odds_betmgm,home_spread_odds_fanduel,home_spread_odds_caesars,home_spread_odds_bet365,home_spread_odds_draftkings,home_spread_odds_bet_rivers_ny,away_spread_betmgm,away_spread_fanduel,away_spread_caesars,away_spread_bet365,away_spread_draftkings,away_spread_bet_rivers_ny,away_spread_odds_betmgm,away_spread_odds_fanduel,away_spread_odds_caesars,away_spread_odds_bet365,away_spread_odds_draftkings,away_spread_odds_bet_rivers_ny,home_ml_betmgm,home_ml_fanduel,home_ml_caesars,home_ml_bet365,home_ml_draftkings,home_ml_bet_rivers_ny,away_ml_betmgm,away_ml_fanduel,away_ml_caesars,away_ml_bet365,away_ml_draftkings,away_ml_bet_rivers_ny,total_betmgm,total_fanduel,total_caesars,total_bet365,total_draftkings,total_bet_rivers_ny,over_odds_betmgm,over_odds_fanduel,over_odds_caesars,over_odds_bet365,over_odds_draftkings,over_odds_bet_rivers_ny,under_odds_betmgm,under_odds_fanduel,under_odds_caesars,under_odds_bet365,under_odds_draftkings,under_odds_bet_rivers_ny,SEASON
0,2019-10-23,Final OT,Toronto Raptors,Toronto,TOR,-1,New Orleans Pelicans,New Orleans,NO,-1,130,122,-12.5,-3.5,-7.0,-6.5,-1.5,,200,104.0,-110.0,-110.0,-121.0,,12.5,3.5,7.0,6.5,1.5,,-250,-132.0,-110.0,-110.0,-108.0,,-294,-1800.0,-290.0,-290.0,-5000.0,,230,800.0,245.0,235.0,1500.0,,229.5,234.5,229.0,229.5,232.5,,-110,100.0,-110.0,-110.0,-115.0,,-110,-128.0,-110.0,-110.0,-112.0,,2019-20
1,2019-10-23,Final,Los Angeles Clippers,L.A. Clippers,LAC,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,112,102,3.5,-9.5,,3.5,-7.0,,-115,-132.0,,-105.0,-114.0,,-3.5,9.5,,-3.5,7.0,,-105,104.0,,-115.0,-113.0,,140,-1600.0,,150.0,-2500.0,,-167,750.0,,-170.0,1150.0,,222.5,218.5,,224.5,219.5,,-115,-112.0,,-110.0,117.0,,-105,-112.0,,-110.0,-152.0,,2019-20
2,2019-10-23,Final,Charlotte Hornets,Charlotte,CHA,-1,Chicago Bulls,Chicago,CHI,-1,126,125,1.5,-1.5,3.5,3.5,4.5,,-110,108.0,-110.0,-110.0,-112.0,,-1.5,1.5,-3.5,-3.5,-4.5,,-110,-136.0,-110.0,-110.0,-120.0,,110,-145.0,140.0,140.0,-175.0,,-133,120.0,-160.0,-160.0,132.0,,,253.5,216.5,217.0,255.5,,,-108.0,-110.0,-110.0,-117.0,,,-118.0,-110.0,-110.0,-114.0,,2019-20
3,2019-10-23,Final,Orlando Magic,Orlando,ORL,-1,Cleveland Cavaliers,Cleveland,CLE,-1,94,85,-7.5,-14.5,-9.5,-9.5,-14.5,,-115,-122.0,-110.0,-110.0,-117.0,,7.5,14.5,9.5,9.5,14.5,,-105,-106.0,-110.0,-110.0,-114.0,,-323,-8000.0,-475.0,-500.0,-670.0,,260,3300.0,375.0,375.0,420.0,,,180.5,211.5,211.5,182.5,,,-126.0,-110.0,-110.0,-120.0,,,-102.0,-110.0,-110.0,-112.0,,2019-20
4,2019-10-23,Final,Indiana Pacers,Indiana,IND,-1,Detroit Pistons,Detroit,DET,-1,110,119,-6.0,2.5,-7.0,-7.5,1.5,,-110,-132.0,-110.0,-105.0,-113.0,,6.0,-2.5,7.0,7.5,-1.5,,-110,104.0,-110.0,-115.0,-118.0,,-175,1200.0,-300.0,-300.0,115.0,,145,-2400.0,250.0,240.0,-150.0,,,228.5,211.0,211.0,223.0,,,-110.0,-110.0,-110.0,-143.0,,,-116.0,-110.0,-110.0,108.0,,2019-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6432,2024-11-16,Final,Charlotte Hornets,Charlotte,CHA,-1,Milwaukee Bucks,Milwaukee,MIL,-1,115,114,3.5,3.5,3.5,4.0,4.0,3.5,-105,-106.0,-110.0,-110.0,-118.0,-115,-3.5,-3.5,-3.5,-4.0,-4.0,-3.5,-115,-114.0,-110.0,-110.0,-102.0,-108,145,146.0,143.0,145.0,142.0,133,-175,-174.0,-169.0,-170.0,-170.0,-159,220.5,219.5,220.0,220.0,220.0,219.5,-110,-110.0,-115.0,-110.0,-105.0,-113,-110,-110.0,-105.0,-110.0,-115.0,-110,2024-25
6433,2024-11-16,20:00 ET,Boston Celtics,Boston,BOS,-1,Toronto Raptors,Toronto,TOR,-1,0,0,-16.5,-16.5,-16.5,-16.5,-16.5,-16.0,-110,-110.0,-115.0,-110.0,-108.0,-110,16.5,16.5,16.5,16.5,16.5,16.0,-110,-110.0,-105.0,-110.0,-112.0,-113,-1600,-1600.0,-1667.0,-1600.0,-1600.0,-1667,900,900.0,900.0,900.0,900.0,850,230.5,230.0,230.0,230.0,230.5,229.5,-110,-112.0,-115.0,-110.0,-108.0,-113,-110,-108.0,-105.0,-110.0,-112.0,-110,2024-25
6434,2024-11-16,20:00 ET,New Orleans Pelicans,New Orleans,NO,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,0,0,7.5,8.0,7.5,7.5,8.0,7.0,-105,-110.0,-105.0,-105.0,-110.0,-108,-7.5,-8.0,-7.5,-7.5,-8.0,-7.0,-115,-110.0,-115.0,-115.0,-110.0,-115,260,280.0,260.0,250.0,250.0,235,-350,-350.0,-333.0,-310.0,-310.0,-286,220.5,220.5,220.5,220.5,220.5,220.0,-110,-110.0,-110.0,-110.0,-110.0,-113,-110,-110.0,-110.0,-110.0,-110.0,-110,2024-25
6435,2024-11-16,20:30 ET,Dallas Mavericks,Dallas,DAL,-1,San Antonio Spurs,San Antonio,SA,-1,0,0,-13.5,-13.5,-13.5,-13.5,-13.0,-13.0,-110,-110.0,-110.0,-110.0,-115.0,-113,13.5,13.5,13.5,13.5,13.0,13.0,-110,-110.0,-110.0,-110.0,-105.0,-110,-900,-800.0,-1000.0,-800.0,-850.0,-715,575,560.0,650.0,550.0,575.0,480,228.5,229.0,229.0,229.0,228.5,229.0,-115,-110.0,-115.0,-110.0,-115.0,-109,-105,-110.0,-105.0,-110.0,-105.0,-114,2024-25


In [41]:
odds_df.tail(20)

Unnamed: 0,GAME_DATE,status,home_team,home_team_loc,home_team_abbr,home_team_rank,away_team,away_team_loc,away_team_abbr,away_team_rank,home_score,away_score,home_spread_betmgm,home_spread_fanduel,home_spread_caesars,home_spread_bet365,home_spread_draftkings,home_spread_bet_rivers_ny,home_spread_odds_betmgm,home_spread_odds_fanduel,home_spread_odds_caesars,home_spread_odds_bet365,home_spread_odds_draftkings,home_spread_odds_bet_rivers_ny,away_spread_betmgm,away_spread_fanduel,away_spread_caesars,away_spread_bet365,away_spread_draftkings,away_spread_bet_rivers_ny,away_spread_odds_betmgm,away_spread_odds_fanduel,away_spread_odds_caesars,away_spread_odds_bet365,away_spread_odds_draftkings,away_spread_odds_bet_rivers_ny,home_ml_betmgm,home_ml_fanduel,home_ml_caesars,home_ml_bet365,home_ml_draftkings,home_ml_bet_rivers_ny,away_ml_betmgm,away_ml_fanduel,away_ml_caesars,away_ml_bet365,away_ml_draftkings,away_ml_bet_rivers_ny,total_betmgm,total_fanduel,total_caesars,total_bet365,total_draftkings,total_bet_rivers_ny,over_odds_betmgm,over_odds_fanduel,over_odds_caesars,over_odds_bet365,over_odds_draftkings,over_odds_bet_rivers_ny,under_odds_betmgm,under_odds_fanduel,under_odds_caesars,under_odds_bet365,under_odds_draftkings,under_odds_bet_rivers_ny,SEASON
6417,2024-11-13,Final,Los Angeles Lakers,L.A. Lakers,LAL,-1,Memphis Grizzlies,Memphis,MEM,-1,128,123,-7.5,-7.0,-7.5,-7.5,-7.5,-7.0,-110,-112.0,-110.0,-110.0,-105.0,-110,7.5,7.0,7.5,7.5,7.5,7.0,-110,-108.0,-110.0,-110.0,-115.0,-113,-350,-295.0,-323.0,-310.0,-310.0,-286,260,240.0,250.0,250.0,250.0,230,233.5,233.0,233.0,233.0,233.0,233.5,-105,-110.0,-110.0,-110.0,-112.0,-113,-115,-110.0,-110.0,-110.0,-108.0,-110,2024-25
6418,2024-11-13,Final,Sacramento Kings,Sacramento,SAC,-1,Phoenix Suns,Phoenix,PHO,-1,127,104,-8.5,-8.0,-8.0,-8.0,-8.0,-8.5,-105,-106.0,-110.0,-110.0,-115.0,-114,8.5,8.0,8.0,8.0,8.0,8.5,-115,-114.0,-110.0,-110.0,-105.0,-109,-350,-300.0,-357.0,-340.0,-345.0,-345,275,245.0,278.0,270.0,275.0,275,227.5,227.0,227.5,226.5,227.5,225.5,-105,-110.0,-115.0,-110.0,-112.0,-113,-115,-110.0,-105.0,-110.0,-108.0,-109,2024-25
6419,2024-11-14,Final,Utah Jazz,Utah,UTA,-1,Dallas Mavericks,Dallas,DAL,-1,115,113,7.5,7.5,8.0,7.5,7.5,7.5,-110,-108.0,-110.0,-110.0,-110.0,-108,-7.5,-7.5,-8.0,-7.5,-7.5,-7.5,-110,-112.0,-110.0,-110.0,-110.0,-115,260,260.0,270.0,260.0,260.0,265,-350,-320.0,-345.0,-320.0,-325.0,-335,230.5,230.0,230.5,230.0,229.5,230.0,-105,-108.0,-115.0,-110.0,-110.0,-112,-115,-112.0,-105.0,-110.0,-110.0,-112,2024-25
6420,2024-11-15,Final,Indiana Pacers,Indiana,IND,-1,Miami Heat,Miami,MIA,-1,111,124,-4.5,-4.5,-4.5,-4.5,-4.5,-4.5,-110,-108.0,-115.0,-115.0,-112.0,-110,4.5,4.5,4.5,4.5,4.5,4.5,-110,-112.0,-105.0,-105.0,-108.0,-113,-185,-188.0,-189.0,-190.0,-185.0,-180,150,158.0,158.0,160.0,154.0,150,228.5,228.5,227.5,228.5,228.5,228.0,-115,-110.0,-110.0,-110.0,-108.0,-113,-105,-110.0,-110.0,-110.0,-112.0,-110,2024-25
6421,2024-11-15,Final,Toronto Raptors,Toronto,TOR,-1,Detroit Pistons,Detroit,DET,-1,95,99,2.5,2.5,2.5,2.5,2.5,2.5,-105,-108.0,-110.0,-105.0,-102.0,-112,-2.5,-2.5,-2.5,-2.5,-2.5,-2.5,-115,-112.0,-110.0,-115.0,-118.0,-112,120,124.0,122.0,120.0,120.0,115,-145,-146.0,-145.0,-140.0,-142.0,-139,225.5,225.0,225.0,225.0,225.0,224.5,-105,-110.0,-110.0,-110.0,-108.0,-109,-115,-110.0,-110.0,-110.0,-112.0,-114,2024-25
6422,2024-11-15,Final,Orlando Magic,Orlando,ORL,-1,Philadelphia 76ers,Philadelphia,PHI,-1,98,86,-2.5,-2.5,-2.5,-2.5,-2.5,-2.0,-115,-110.0,-110.0,-105.0,-112.0,-110,2.5,2.5,2.5,2.5,2.5,2.0,-105,-110.0,-110.0,-115.0,-108.0,-113,-145,-138.0,-141.0,-135.0,-142.0,-129,120,118.0,118.0,115.0,120.0,108,209.5,208.5,208.5,208.5,208.5,209.0,-105,-112.0,-105.0,-110.0,-112.0,-110,-115,-108.0,-115.0,-110.0,-108.0,-113,2024-25
6423,2024-11-15,Final,San Antonio Spurs,San Antonio,SA,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,115,120,2.5,2.5,2.5,2.5,2.5,2.5,-110,-108.0,-105.0,-110.0,-112.0,-110,-2.5,-2.5,-2.5,-2.5,-2.5,-2.5,-110,-112.0,-115.0,-110.0,-108.0,-112,115,118.0,115.0,120.0,114.0,117,-140,-142.0,-135.0,-140.0,-135.0,-139,228.5,228.0,227.5,228.0,227.5,228.0,-105,-110.0,-110.0,-110.0,-110.0,-110,-115,-110.0,-110.0,-110.0,-110.0,-113,2024-25
6424,2024-11-15,Final,Atlanta Hawks,Atlanta,ATL,-1,Washington Wizards,Washington,WAS,-1,129,117,-9.5,-9.0,-9.0,-9.0,-9.5,-9.0,-105,-110.0,-115.0,-110.0,-105.0,-113,9.5,9.0,9.0,9.0,9.5,9.0,-115,-110.0,-105.0,-110.0,-115.0,-110,-450,-430.0,-476.0,-400.0,-425.0,-375,333,340.0,360.0,320.0,330.0,290,241.5,241.5,242.0,241.5,241.5,241.5,-110,-110.0,-110.0,-110.0,-112.0,-113,-110,-110.0,-110.0,-110.0,-108.0,-110,2024-25
6425,2024-11-15,Final,New York Knicks,New York,NY,-1,Brooklyn Nets,Brooklyn,BK,-1,124,122,-8.5,-8.5,-9.0,-8.5,-8.5,-8.5,-110,-110.0,-110.0,-110.0,-112.0,-109,8.5,8.5,9.0,8.5,8.5,8.5,-110,-110.0,-110.0,-110.0,-108.0,-114,-350,-335.0,-385.0,-370.0,-375.0,-375,280,270.0,300.0,290.0,295.0,290,214.5,214.5,214.5,214.0,214.5,215.5,-105,-110.0,-110.0,-110.0,-110.0,-109,-115,-110.0,-110.0,-110.0,-110.0,-114,2024-25
6426,2024-11-15,Final,Cleveland Cavaliers,Cleveland,CLE,-1,Chicago Bulls,Chicago,CHI,-1,144,126,-10.5,-9.5,-9.5,-10.0,-10.0,-9.5,-105,-110.0,-115.0,-105.0,-112.0,-115,10.5,9.5,9.5,10.0,10.0,9.5,-115,-110.0,-105.0,-115.0,-108.0,-108,-500,-490.0,-476.0,-500.0,-500.0,-435,375,380.0,360.0,375.0,380.0,330,238.5,238.5,239.0,238.0,238.5,238.5,-110,-110.0,-115.0,-110.0,-112.0,-113,-110,-110.0,-105.0,-110.0,-108.0,-110,2024-25


In [23]:
conn = sqlite3.connect('../data/nba_stats.db')
df = pd.read_sql("""SELECT * FROM nba_odds""", conn)
df


Unnamed: 0,date,status,home_team,home_team_loc,home_team_abbr,home_team_rank,away_team,away_team_loc,away_team_abbr,away_team_rank,home_score,away_score,home_spread_betmgm,home_spread_fanduel,home_spread_caesars,home_spread_bet365,home_spread_draftkings,home_spread_bet_rivers_ny,home_spread_odds_betmgm,home_spread_odds_fanduel,home_spread_odds_caesars,home_spread_odds_bet365,home_spread_odds_draftkings,home_spread_odds_bet_rivers_ny,away_spread_betmgm,away_spread_fanduel,away_spread_caesars,away_spread_bet365,away_spread_draftkings,away_spread_bet_rivers_ny,away_spread_odds_betmgm,away_spread_odds_fanduel,away_spread_odds_caesars,away_spread_odds_bet365,away_spread_odds_draftkings,away_spread_odds_bet_rivers_ny,home_ml_betmgm,home_ml_fanduel,home_ml_caesars,home_ml_bet365,home_ml_draftkings,home_ml_bet_rivers_ny,away_ml_betmgm,away_ml_fanduel,away_ml_caesars,away_ml_bet365,away_ml_draftkings,away_ml_bet_rivers_ny,total_betmgm,total_fanduel,total_caesars,total_bet365,total_draftkings,total_bet_rivers_ny,over_odds_betmgm,over_odds_fanduel,over_odds_caesars,over_odds_bet365,over_odds_draftkings,over_odds_bet_rivers_ny,under_odds_betmgm,under_odds_fanduel,under_odds_caesars,under_odds_bet365,under_odds_draftkings,under_odds_bet_rivers_ny,season
0,2020-10-11T23:30:00+00:00,Final,Miami Heat,Miami,MIA,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,93,106,,6.0,5.5,5.5,6.0,,,-110.0,-110.0,-110.0,-110.0,,,-6.0,-5.5,-5.5,-6.0,,,-110.0,-110.0,-110.0,-110.0,,,215.0,210.0,200.0,215.0,,,-260.0,-250.0,-240.0,-265.0,,,215.0,214.5,215.0,214.5,,,-105.0,-110.0,-110.0,-108.0,,,-115.0,-110.0,-110.0,-113.0,,2019-20
1,2020-10-11T23:30:00+00:00,Final,Miami Heat,Miami,MIA,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,93,106,,6.0,5.5,5.5,6.0,,,-110.0,-110.0,-110.0,-110.0,,,-6.0,-5.5,-5.5,-6.0,,,-110.0,-110.0,-110.0,-110.0,,,215.0,210.0,200.0,215.0,,,-260.0,-250.0,-240.0,-265.0,,,215.0,214.5,215.0,214.5,,,-105.0,-110.0,-110.0,-108.0,,,-115.0,-110.0,-110.0,-113.0,,2019-20
2,2020-10-10T01:00:00+00:00,Final,Los Angeles Lakers,L.A. Lakers,LAL,-1,Miami Heat,Miami,MIA,-1,108,111,,-7.5,-7.0,-7.5,-7.0,,,-106.0,-110.0,-105.0,-112.0,,,7.5,7.0,7.5,7.0,,,-114.0,-110.0,-115.0,-108.0,,,-330.0,-320.0,-320.0,-335.0,,,265.0,260.0,260.0,270.0,,,215.5,215.5,215.5,215.5,,,-110.0,-110.0,-110.0,-112.0,,,-110.0,-110.0,-110.0,-109.0,,2019-20
3,2020-10-07T01:00:00+00:00,Final,Miami Heat,Miami,MIA,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,96,102,,7.5,7.5,7.5,7.5,,,-108.0,-110.0,-115.0,-110.0,,,-7.5,-7.5,-7.5,-7.5,,,-112.0,-110.0,-105.0,-110.0,,,270.0,290.0,275.0,275.0,,,-335.0,-360.0,-350.0,-335.0,,,218.5,218.0,218.0,218.5,,,-115.0,-110.0,-110.0,-113.0,,,-105.0,-110.0,-110.0,-108.0,,2019-20
4,2020-10-04T23:30:00+00:00,Final,Miami Heat,Miami,MIA,-1,Los Angeles Lakers,L.A. Lakers,LAL,-1,115,104,,9.0,9.0,9.5,9.5,,,-106.0,-110.0,-120.0,-110.0,,,-9.0,-9.0,-9.5,-9.5,,,-114.0,-110.0,100.0,-110.0,,,330.0,360.0,325.0,350.0,,,-420.0,-450.0,-425.0,-455.0,,,219.5,220.0,219.5,219.5,,,-110.0,-110.0,-110.0,-113.0,,,-110.0,-110.0,-110.0,-108.0,,2019-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6456,2024-06-07T00:30:00+00:00,Final,Boston Celtics,Boston,BOS,-1,Dallas Mavericks,Dallas,DAL,-1,107,89,-6.5,-6.5,-6.5,-6.5,-6.5,-6,-110,-110.0,-105.0,-110.0,-110.0,-114,6.5,6.5,6.5,6.5,6.5,6,-110,-110.0,-115.0,-110.0,-110.0,-109,-250,-250.0,-238.0,-245.0,-230.0,-240,200,205.0,196.0,200.0,190.0,195,217.5,216.5,217.5,217.5,217.5,217.5,-110,-112.0,-110.0,-110.0,-110.0,-114,-110,-108.0,-110.0,-110.0,-110.0,-109,2023-24
6457,2024-06-10T00:00:00+00:00,Final,Boston Celtics,Boston,BOS,-1,Dallas Mavericks,Dallas,DAL,-1,105,98,-6.5,-6.5,-7.0,-7.0,-7.0,-7,-115,-112.0,-110.0,-105.0,-105.0,-109,6.5,6.5,7.0,7.0,7.0,7,-105,-108.0,-110.0,-115.0,-115.0,-114,-300,-260.0,-278.0,-275.0,-270.0,-265,230,215.0,222.0,225.0,220.0,215,214.5,215.0,214.5,215.0,215.0,215.5,-115,-108.0,-115.0,-110.0,-110.0,-112,-105,-112.0,-105.0,-110.0,-110.0,-112,2023-24
6458,2024-06-13T00:30:00+00:00,Final,Dallas Mavericks,Dallas,DAL,-1,Boston Celtics,Boston,BOS,-1,99,106,-2.5,-3.0,-3.0,-2.5,-3.0,-3,-115,-108.0,-115.0,-115.0,-108.0,-113,2.5,3.0,3.0,2.5,3.0,3,-105,-112.0,-105.0,-105.0,-112.0,-110,-150,-152.0,-154.0,-150.0,-148.0,-150,125,128.0,130.0,125.0,124.0,125,213.5,213.5,213.5,213.0,213.5,213.5,-110,-108.0,-115.0,-110.0,-110.0,-113,-110,-112.0,-105.0,-110.0,-110.0,-110,2023-24
6459,2024-06-15T00:30:00+00:00,Final,Dallas Mavericks,Dallas,DAL,-1,Boston Celtics,Boston,BOS,-1,122,84,1.5,1.5,1.5,1.0,1.0,1,-115,-112.0,-110.0,-110.0,-108.0,-109,-1.5,-1.5,-1.5,-1.0,-1.0,-1,-105,-108.0,-110.0,-110.0,-112.0,-114,-105,100.0,105.0,100.0,-102.0,-103,-115,-118.0,-125.0,-120.0,-118.0,-120,211.5,211.5,210.5,211.5,211.0,212,-110,-110.0,-110.0,-110.0,-110.0,-109,-110,-110.0,-110.0,-110.0,-110.0,-114,2023-24


In [48]:
conn = sqlite3.connect('nba_stats.db')

df_basic_team = pd.read_sql("""SELECT * FROM team_basic_stats""", conn)
df_basic_player = pd.read_sql("""SELECT * FROM player_basic_stats""", conn)

df_track_player = pd.read_sql("""SELECT * FROM player_track_stats""", conn)
df_adv_team = pd.read_sql("""SELECT * FROM team_advanced_stats""", conn)
df_hustle_team = pd.read_sql("""SELECT * FROM team_hustle_stats""", conn)
df_track_team = pd.read_sql("""SELECT * FROM team_track_stats""", conn)
df_misc_team = pd.read_sql("""SELECT * FROM team_miscellaneous_stats""", conn)
df_usage_team = pd.read_sql("""SELECT * FROM team_usage_stats""", conn)

conn.close()


In [None]:
conn = sqlite3.connect('nba_stats.db')

df_def_team = pd.read_sql("""select * from team_defensive_stats""", conn)
df_def_team

Unnamed: 0,gameId,teamId,teamCity,teamName,teamTricode,teamSlug,minutes
0,0042200314,1610612747,Los Angeles,Lakers,LAL,lakers,
1,0042200314,1610612743,Denver,Nuggets,DEN,nuggets,
2,0042200405,1610612743,Denver,Nuggets,DEN,nuggets,
3,0042200405,1610612748,Miami,Heat,MIA,heat,
4,0042200404,1610612748,Miami,Heat,MIA,heat,
...,...,...,...,...,...,...,...
217,0022201215,1610612757,Portland,Trail Blazers,POR,blazers,
218,0022201203,1610612754,Indiana,Pacers,IND,pacers,
219,0022201203,1610612765,Detroit,Pistons,DET,pistons,
220,0022201202,1610612766,Charlotte,Hornets,CHA,hornets,


In [5]:
conn = sqlite3.connect('nba_stats.db')
pd.read_sql("""select * from sqlite_master where type='table'""", conn)



Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,traditional_stats,traditional_stats,2,CREATE TABLE traditional_stats (\n game_id ...
1,table,advanced_stats,advanced_stats,4,CREATE TABLE advanced_stats (\n game_id TEX...
2,table,scoring_stats,scoring_stats,6,CREATE TABLE scoring_stats (\n game_id TEXT...
3,table,defensive_stats,defensive_stats,8,CREATE TABLE defensive_stats (\n game_id TE...
4,table,hustle_stats,hustle_stats,10,"CREATE TABLE hustle_stats (\n game_id TEXT,..."
5,table,player_traditional_stats,player_traditional_stats,14,"CREATE TABLE ""player_traditional_stats"" (\n""GA..."
6,table,team_traditional_stats,team_traditional_stats,15,"CREATE TABLE ""team_traditional_stats"" (\n""GAME..."
7,table,player_advanced_stats,player_advanced_stats,16,"CREATE TABLE ""player_advanced_stats"" (\n""GAME_..."
8,table,team_advanced_stats,team_advanced_stats,20,"CREATE TABLE ""team_advanced_stats"" (\n""GAME_ID..."
9,table,player_scoring_stats,player_scoring_stats,21,"CREATE TABLE ""player_scoring_stats"" (\n""GAME_I..."


In [164]:
df_basic_player = pd.read_sql("""SELECT * FROM player_basic_stats b""", conn)
df_basic_team = pd.read_sql("""SELECT * FROM team_basic_stats b""", conn)
df_hustle_team = pd.read_sql("""SELECT * FROM team_hustle_stats b""", conn)

df_advanced_player = pd.read_sql("""SELECT * FROM player_advanced_stats b""", conn)


In [6]:
get_nba_game_dates(season='2023-24')

NameError: name 'get_nba_game_dates' is not defined

In [None]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashteamshotlocations
import sqlite3
import time
from requests.exceptions import RequestException, Timeout
import backoff

# Simple configuration
DB_NAME = 'nba_stats.db'
SEASONS = ['2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24'
           ]

def custom_backoff():
    yield 80
    yield 120
    yield 180

@backoff.on_exception(custom_backoff, (RequestException, Timeout), max_tries=4)
def get_team_shot_location_stats(date, season, season_type):
    stats = leaguedashteamshotlocations.LeagueDashTeamShotLocations(
        distance_range='By Zone',
        per_mode_detailed='PerGame',
        season=season, 
        season_type_all_star=season_type,
        date_from_nullable=date,
        date_to_nullable=date,
    )
    
    df = stats.get_data_frames()[0]
    df['SEASON'] = season
    df['Date'] = date
    return df

def get_all_shot_location_stats(date, season, season_type):
    try:
        print("processing date: ", date)
        df = get_team_shot_location_stats(date, season, season_type)
    except Exception as e:
        print(f"Error fetching data for {date}: {str(e)}")
        return None
    return df

def season_id_to_type(season_id):
    if season_id.startswith('22'):
        return 'Regular Season'
    elif season_id.startswith('52'):
        return 'PlayIn'
    elif season_id.startswith('42'):
        return 'Playoffs'
    else:
        return 'Unknown'

def fetch_game_date_and_season_type(season):
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season)
    
    games = gamefinder.get_data_frames()[0]
    nba_games = games.loc[(games['SEASON_ID'].str.startswith('22')) 
                          | (games['SEASON_ID'].str.startswith('52')) 
                          | (games['SEASON_ID'].str.startswith('42'))].sort_values(by='GAME_DATE')
        
    nba_games['SEASON_TYPE'] = nba_games['SEASON_ID'].apply(season_id_to_type)
    
    game_dates_and_season_type = nba_games.groupby(['GAME_DATE', 'SEASON_TYPE']).size().reset_index()[['GAME_DATE', 'SEASON_TYPE']]


    return list(game_dates_and_season_type.itertuples(index=False, name=None))


def process_shot_location_stats(df):
    
    df.columns = df.columns.map(' '.join)
    df.columns = [col.strip() for col in df.columns]

    return df

def get_existing_dates(conn, table_name):
    query = f"SELECT DISTINCT Date, SEASON FROM {table_name}"
    df = pd.read_sql_query(query, conn)
    return set(df.apply(tuple, axis=1))


def main():
    conn = sqlite3.connect(DB_NAME)
    
    table_name = 'team_shot_location_boxscores'
    
    # Get existing dates
    existing_dates = get_existing_dates(conn, table_name)
    
    for season in SEASONS:
        print(f"Processing season: {season}")
        game_dates_and_season_types = fetch_game_date_and_season_type(season)
        for date, season_type in game_dates_and_season_types:
            # Check if this date and season combination already exists
            if (date, season) not in existing_dates:
                raw_data = get_all_shot_location_stats(date, season, season_type)
                processed_data = process_shot_location_stats(raw_data)
                if processed_data is not None:
                    processed_data.to_sql(f'{table_name}', conn, if_exists='append', index=False)
                time.sleep(4*random.random())  # Basic rate limiting
            else:
                print(f"Skipping existing date: {date} for season {season}")

    # Remove duplicates using SQL
    # remove_duplicates_sql(conn, table_name)
    
    conn.close()
if __name__ == "__main__":
    main()

Processing season: 2014-15
Skipping existing date: 2014-10-28 for season 2014-15
Skipping existing date: 2014-10-29 for season 2014-15
Skipping existing date: 2014-10-30 for season 2014-15
Skipping existing date: 2014-10-31 for season 2014-15
Skipping existing date: 2014-11-01 for season 2014-15
Skipping existing date: 2014-11-02 for season 2014-15
Skipping existing date: 2014-11-03 for season 2014-15
Skipping existing date: 2014-11-04 for season 2014-15
Skipping existing date: 2014-11-05 for season 2014-15
Skipping existing date: 2014-11-06 for season 2014-15
Skipping existing date: 2014-11-07 for season 2014-15
Skipping existing date: 2014-11-08 for season 2014-15
Skipping existing date: 2014-11-09 for season 2014-15
Skipping existing date: 2014-11-10 for season 2014-15
Skipping existing date: 2014-11-11 for season 2014-15
Skipping existing date: 2014-11-12 for season 2014-15
Skipping existing date: 2014-11-13 for season 2014-15
Skipping existing date: 2014-11-14 for season 2014-15
S

In [None]:
date = '2023-10-23'
season = '2023-24'
season_type = 'Regular Season'

get_team_shot_location_stats(date, season, season_type)

In [None]:

,m67890-

In [42]:
conn = sqlite3.connect('../data/nba_stats.db')

x = pd.read_sql_query("SELECT * FROM team_shot_location_boxscores", conn)


x

Unnamed: 0,TEAM_ID,TEAM_NAME,Restricted Area FGM,Restricted Area FGA,Restricted Area FG_PCT,In The Paint (Non-RA) FGM,In The Paint (Non-RA) FGA,In The Paint (Non-RA) FG_PCT,Mid-Range FGM,Mid-Range FGA,Mid-Range FG_PCT,Left Corner 3 FGM,Left Corner 3 FGA,Left Corner 3 FG_PCT,Right Corner 3 FGM,Right Corner 3 FGA,Right Corner 3 FG_PCT,Above the Break 3 FGM,Above the Break 3 FGA,Above the Break 3 FG_PCT,Backcourt FGM,Backcourt FGA,Backcourt FG_PCT,Corner 3 FGM,Corner 3 FGA,Corner 3 FG_PCT,SEASON,GAME_DATE
0,1610612742,Dallas Mavericks,10.0,18.0,0.556,2.0,9.0,0.222,18.0,30.0,0.600,0.0,4.0,0.000,2.0,4.0,0.500,6.0,13.0,0.462,0.0,0.0,0.0,2.0,8.0,0.250,2014-15,2014-10-28
1,1610612745,Houston Rockets,15.0,24.0,0.625,2.0,8.0,0.250,2.0,12.0,0.167,3.0,9.0,0.333,2.0,5.0,0.400,7.0,15.0,0.467,0.0,0.0,0.0,5.0,14.0,0.357,2014-15,2014-10-28
2,1610612747,Los Angeles Lakers,14.0,28.0,0.500,6.0,14.0,0.429,5.0,27.0,0.185,0.0,0.0,0.000,1.0,2.0,0.500,2.0,8.0,0.250,0.0,0.0,0.0,1.0,2.0,0.500,2014-15,2014-10-28
3,1610612740,New Orleans Pelicans,30.0,58.0,0.517,2.0,8.0,0.250,5.0,18.0,0.278,0.0,2.0,0.000,0.0,3.0,0.000,4.0,12.0,0.333,0.0,0.0,0.0,0.0,5.0,0.000,2014-15,2014-10-28
4,1610612753,Orlando Magic,21.0,39.0,0.538,3.0,15.0,0.200,4.0,19.0,0.211,0.0,1.0,0.000,1.0,2.0,0.500,3.0,7.0,0.429,0.0,1.0,0.0,1.0,3.0,0.333,2014-15,2014-10-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26087,1610612755,Philadelphia 76ers,21.0,31.0,0.677,3.0,6.0,0.500,1.0,6.0,0.167,3.0,9.0,0.333,3.0,6.0,0.500,4.0,24.0,0.167,0.0,2.0,0.0,6.0,15.0,0.400,2024-25,2024-11-18
26088,1610612756,Phoenix Suns,14.0,22.0,0.636,6.0,15.0,0.400,3.0,5.0,0.600,5.0,7.0,0.714,2.0,7.0,0.286,5.0,27.0,0.185,0.0,0.0,0.0,7.0,14.0,0.500,2024-25,2024-11-18
26089,1610612758,Sacramento Kings,9.0,16.0,0.563,7.0,20.0,0.350,1.0,3.0,0.333,5.0,7.0,0.714,1.0,5.0,0.200,14.0,33.0,0.424,0.0,1.0,0.0,6.0,12.0,0.500,2024-25,2024-11-18
26090,1610612761,Toronto Raptors,24.0,32.0,0.750,9.0,18.0,0.500,1.0,3.0,0.333,6.0,9.0,0.667,4.0,6.0,0.667,4.0,13.0,0.308,0.0,1.0,0.0,10.0,15.0,0.667,2024-25,2024-11-18


In [15]:
l = x[['GAME_DATE', 'SEASON_ID']].values.tolist()

dates = [x[0] for x in l]
seasons = [x[1] for x in l]

print(dates)

['2015-10-27', '2015-10-27', '2015-10-27', '2015-10-27', '2015-10-27', '2015-10-27', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-28', '2015-10-29', '2015-10-29', '2015-10-29', '2015-10-29', '2015-10-29', '2015-10-29', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-30', '2015-10-31', '2015-10-31', '2015-10-31', '2015-10-31', '2015-10-31', '2015-10-31', '2015-10-31', '2015

In [22]:
conn = sqlite3.connect('../data/nba_stats.db')

x = pd.read_sql_query("SELECT * FROM team_shot_location_boxscores", conn)

conn.close()
x


Unnamed: 0,TEAM_ID,TEAM_NAME,Restricted Area FGM,Restricted Area FGA,Restricted Area FG_PCT,In The Paint (Non-RA) FGM,In The Paint (Non-RA) FGA,In The Paint (Non-RA) FG_PCT,Mid-Range FGM,Mid-Range FGA,Mid-Range FG_PCT,Left Corner 3 FGM,Left Corner 3 FGA,Left Corner 3 FG_PCT,Right Corner 3 FGM,Right Corner 3 FGA,Right Corner 3 FG_PCT,Above the Break 3 FGM,Above the Break 3 FGA,Above the Break 3 FG_PCT,Backcourt FGM,Backcourt FGA,Backcourt FG_PCT,Corner 3 FGM,Corner 3 FGA,Corner 3 FG_PCT,SEASON,GAME_DATE
0,1610612742,Dallas Mavericks,10.0,18.0,0.556,2.0,9.0,0.222,18.0,30.0,0.600,0.0,4.0,0.000,2.0,4.0,0.500,6.0,13.0,0.462,0.0,0.0,0.0,2.0,8.0,0.250,2014-15,2014-10-28
1,1610612745,Houston Rockets,15.0,24.0,0.625,2.0,8.0,0.250,2.0,12.0,0.167,3.0,9.0,0.333,2.0,5.0,0.400,7.0,15.0,0.467,0.0,0.0,0.0,5.0,14.0,0.357,2014-15,2014-10-28
2,1610612747,Los Angeles Lakers,14.0,28.0,0.500,6.0,14.0,0.429,5.0,27.0,0.185,0.0,0.0,0.000,1.0,2.0,0.500,2.0,8.0,0.250,0.0,0.0,0.0,1.0,2.0,0.500,2014-15,2014-10-28
3,1610612740,New Orleans Pelicans,30.0,58.0,0.517,2.0,8.0,0.250,5.0,18.0,0.278,0.0,2.0,0.000,0.0,3.0,0.000,4.0,12.0,0.333,0.0,0.0,0.0,0.0,5.0,0.000,2014-15,2014-10-28
4,1610612753,Orlando Magic,21.0,39.0,0.538,3.0,15.0,0.200,4.0,19.0,0.211,0.0,1.0,0.000,1.0,2.0,0.500,3.0,7.0,0.429,0.0,1.0,0.0,1.0,3.0,0.333,2014-15,2014-10-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26043,1610612759,San Antonio Spurs,16.0,21.0,0.762,6.0,14.0,0.429,4.0,8.0,0.500,3.0,4.0,0.750,3.0,7.0,0.429,11.0,34.0,0.324,0.0,0.0,0.0,6.0,11.0,0.545,2024-25,2024-11-15
26044,1610612761,Toronto Raptors,14.0,31.0,0.452,12.0,28.0,0.429,1.0,7.0,0.143,2.0,4.0,0.500,2.0,8.0,0.250,4.0,19.0,0.211,0.0,0.0,0.0,4.0,12.0,0.333,2024-25,2024-11-15
26045,1610612764,Washington Wizards,18.0,37.0,0.486,5.0,13.0,0.385,5.0,13.0,0.385,3.0,6.0,0.500,0.0,1.0,0.000,12.0,36.0,0.333,0.0,0.0,0.0,3.0,7.0,0.429,2024-25,2024-11-15
26046,1610612766,Charlotte Hornets,21.0,33.0,0.636,3.0,12.0,0.250,2.0,6.0,0.333,3.0,5.0,0.600,5.0,9.0,0.556,7.0,27.0,0.259,0.0,1.0,0.0,8.0,14.0,0.571,2024-25,2024-11-16


In [33]:
x

Unnamed: 0,TEAM_ID,TEAM_NAME,Restricted Area FGM,Restricted Area FGA,Restricted Area FG_PCT,In The Paint (Non-RA) FGM,In The Paint (Non-RA) FGA,In The Paint (Non-RA) FG_PCT,Mid-Range FGM,Mid-Range FGA,...,Above the Break 3 FGA,Above the Break 3 FG_PCT,Backcourt FGM,Backcourt FGA,Backcourt FG_PCT,Corner 3 FGM,Corner 3 FGA,Corner 3 FG_PCT,SEASON,Date
0,1610612737,Atlanta Hawks,15.0,30.0,0.500,3.0,12.0,0.250,9.0,16.0,...,16.0,0.563,0.0,0.0,0.0,4.0,6.0,0.667,2014-15,2014-10-29
1,1610612738,Boston Celtics,24.0,38.0,0.632,7.0,11.0,0.636,10.0,17.0,...,19.0,0.368,0.0,0.0,0.0,1.0,3.0,0.333,2014-15,2014-10-29
2,1610612751,Brooklyn Nets,14.0,23.0,0.609,6.0,18.0,0.333,12.0,20.0,...,12.0,0.417,0.0,0.0,0.0,2.0,7.0,0.286,2014-15,2014-10-29
3,1610612766,Charlotte Hornets,15.0,28.0,0.536,5.0,14.0,0.357,15.0,38.0,...,17.0,0.353,0.0,1.0,0.0,0.0,3.0,0.000,2014-15,2014-10-29
4,1610612741,Chicago Bulls,16.0,25.0,0.640,5.0,10.0,0.500,8.0,12.0,...,15.0,0.333,0.0,0.0,0.0,2.0,9.0,0.222,2014-15,2014-10-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5139,1610612744,Golden State Warriors,13.0,18.0,0.722,5.0,14.0,0.357,10.0,22.0,...,18.0,0.167,0.0,0.0,0.0,3.0,7.0,0.429,2015-16,2016-04-21
5140,1610612745,Houston Rockets,16.0,31.0,0.516,7.0,17.0,0.412,2.0,8.0,...,23.0,0.304,0.0,1.0,0.0,4.0,12.0,0.333,2015-16,2016-04-21
5141,1610612754,Indiana Pacers,13.0,26.0,0.500,1.0,3.0,0.333,6.0,17.0,...,17.0,0.294,0.0,0.0,0.0,1.0,5.0,0.200,2015-16,2016-04-21
5142,1610612760,Oklahoma City Thunder,15.0,17.0,0.882,2.0,14.0,0.143,13.0,20.0,...,19.0,0.474,0.0,0.0,0.0,6.0,8.0,0.750,2015-16,2016-04-21


In [10]:
fetch_game_date_and_season_type('2023-24')

[('2023-10-24', 'Regular Season'),
 ('2023-10-25', 'Regular Season'),
 ('2023-10-26', 'Regular Season'),
 ('2023-10-27', 'Regular Season'),
 ('2023-10-28', 'Regular Season'),
 ('2023-10-29', 'Regular Season'),
 ('2023-10-30', 'Regular Season'),
 ('2023-10-31', 'Regular Season'),
 ('2023-11-01', 'Regular Season'),
 ('2023-11-02', 'Regular Season'),
 ('2023-11-03', 'Regular Season'),
 ('2023-11-04', 'Regular Season'),
 ('2023-11-05', 'Regular Season'),
 ('2023-11-06', 'Regular Season'),
 ('2023-11-08', 'Regular Season'),
 ('2023-11-09', 'Regular Season'),
 ('2023-11-10', 'PlayIn'),
 ('2023-11-10', 'Regular Season'),
 ('2023-11-11', 'PlayIn'),
 ('2023-11-11', 'Regular Season'),
 ('2023-11-12', 'PlayIn'),
 ('2023-11-12', 'Regular Season'),
 ('2023-11-13', 'PlayIn'),
 ('2023-11-13', 'Regular Season'),
 ('2023-11-14', 'PlayIn'),
 ('2023-11-14', 'Regular Season'),
 ('2023-11-15', 'PlayIn'),
 ('2023-11-15', 'Regular Season'),
 ('2023-11-16', 'PlayIn'),
 ('2023-11-16', 'Regular Season'),
 ('202

In [12]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashteamstats
import sqlite3
import time
from requests.exceptions import RequestException
import backoff

# Simple configuration
DB_NAME = 'nba_stats.db'
SEASONS = ['2016-17'
        #    , '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24'
           ]

@backoff.on_exception(backoff.expo, RequestException, max_tries=3)
def get_defensive_stats(position, date, season):
    stats = leaguedashteamstats.LeagueDashTeamStats(
        measure_type_detailed_defense='Opponent',
        per_mode_detailed='PerGame',
        season=season, 
        season_type_all_star='Regular Season',
        date_from_nullable=date,
        date_to_nullable=date,
        player_position_abbreviation_nullable=position
    )
    
    df = stats.get_data_frames()[0]
    df['SEASON'] = season
    df['Position'] = position
    df['Date'] = date
    return df

def get_all_defensive_stats(date, season):
    all_data = []
    
    for position in POSITIONS:
        print(f"Fetching data for position {position} on {date}")
        try:
            df = get_defensive_stats(position, date, season)
            all_data.append(df)
        except Exception as e:
            print(f"Error fetching data for position {position} on {date}: {str(e)}")
    
    return pd.concat(all_data, ignore_index=True) if all_data else None

def process_defensive_stats(df):
    if df is None or df.empty:
        return None

    df_pivoted = df.pivot(index=['TEAM_ID', 'TEAM_NAME', 'Date'], 
                          columns='Position', 
                          values=df.columns[4:])
    
    df_pivoted.columns = [f'{stat}_{pos}' for stat, pos in df_pivoted.columns]
    df_pivoted.reset_index(inplace=True)
    
    return df_pivoted

def fetch_game_dates(season):
    # Implement this function to return a list of game dates for the season
    # This is a placeholder and should be replaced with actual implementation
    return []

def main():
    conn = sqlite3.connect(DB_NAME)
    
    for season in SEASONS:
        print(f"Processing season: {season}")
        game_dates = fetch_game_dates(season)
        for date in game_dates:
            raw_data = get_all_defensive_stats(date, season)
            processed_data = process_defensive_stats(raw_data)
            if processed_data is not None:
                processed_data.to_sql('defensive_stats_by_position', conn, if_exists='append', index=False)
            time.sleep(1)  # Basic rate limiting

    conn.close()

# if __name__ == "__main__":
#     main()

In [14]:
POSITIONS = ['G', 'C', 'F']

date = '2024-11-08'
season = '2024-25'


df = get_all_defensive_stats(date, season)
df

Fetching data for position G on 2024-11-08
Fetching data for position C on 2024-11-08
Fetching data for position F on 2024-11-08


Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,OPP_FGM,OPP_FGA,OPP_FG_PCT,...,OPP_STL_RANK,OPP_BLK_RANK,OPP_BLKA_RANK,OPP_PF_RANK,OPP_PFD_RANK,OPP_PTS_RANK,PLUS_MINUS_RANK,SEASON,Position,Date
0,1610612737,Atlanta Hawks,1,0,1,0.0,22.8,20.2,41.0,0.493,...,8,20,9,12,12,10,11,2024-25,G,2024-11-08
1,1610612738,Boston Celtics,1,1,0,1.0,33.2,25.6,62.2,0.412,...,11,7,2,26,12,17,9,2024-25,G,2024-11-08
2,1610612751,Brooklyn Nets,1,0,1,0.0,23.7,17.8,40.4,0.441,...,8,21,21,4,9,6,15,2024-25,G,2024-11-08
3,1610612766,Charlotte Hornets,1,1,0,1.0,17.1,13.4,30.0,0.447,...,14,7,12,3,25,2,14,2024-25,G,2024-11-08
4,1610612739,Cleveland Cavaliers,1,1,0,1.0,29.4,28.2,59.0,0.478,...,26,14,6,25,21,21,4,2024-25,G,2024-11-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,1610612755,Philadelphia 76ers,1,0,1,0.0,24.8,22.4,43.4,0.516,...,2,26,15,9,19,17,20,2024-25,F,2024-11-08
72,1610612756,Phoenix Suns,1,1,0,1.0,16.1,12.4,26.4,0.470,...,3,2,23,12,22,2,8,2024-25,F,2024-11-08
73,1610612757,Portland Trail Blazers,1,0,1,0.0,24.7,25.4,51.8,0.490,...,22,22,6,24,14,22,26,2024-25,F,2024-11-08
74,1610612758,Sacramento Kings,1,0,1,0.0,28.6,21.0,48.6,0.432,...,16,5,18,23,2,20,18,2024-25,F,2024-11-08


In [5]:

pd.options.display.max_columns = None
season = '2024-25'
position = 'G'
date = '2024-11-13'

stats = leaguedashteamstats.LeagueDashTeamStats(
    measure_type_detailed_defense='Opponent',
    per_mode_detailed='PerGame',
    season=season, 
    season_type_all_star='Regular Season',
    date_from_nullable=date,
    date_to_nullable=date,
    player_position_abbreviation_nullable=position
)

stats.get_data_frames()[0]

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_TOV,OPP_STL,OPP_BLK,OPP_BLKA,OPP_PF,OPP_PFD,OPP_PTS,PLUS_MINUS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,OPP_FGM_RANK,OPP_FGA_RANK,OPP_FG_PCT_RANK,OPP_FG3M_RANK,OPP_FG3A_RANK,OPP_FG3_PCT_RANK,OPP_FTM_RANK,OPP_FTA_RANK,OPP_FT_PCT_RANK,OPP_OREB_RANK,OPP_DREB_RANK,OPP_REB_RANK,OPP_AST_RANK,OPP_TOV_RANK,OPP_STL_RANK,OPP_BLK_RANK,OPP_BLKA_RANK,OPP_PF_RANK,OPP_PFD_RANK,OPP_PTS_RANK,PLUS_MINUS_RANK
0,1610612738,Boston Celtics,1,1,0,1.0,32.6,26.8,54.2,0.494,12.6,29.0,0.434,9.6,11.4,0.842,2.0,21.0,23.0,17.4,8.6,2.6,1.6,4.0,13.4,11.2,75.8,-18.8,1,1,1,1,4,18,18,14,22,20,16,16,14,16,2,18,13,18,6,2,6,6,21,4,19,1
1,1610612751,Brooklyn Nets,1,0,1,0.0,21.2,22.6,41.0,0.551,10.0,20.2,0.495,7.8,10.8,0.722,5.0,14.6,19.6,15.6,3.2,3.0,2.6,1.4,7.6,9.0,63.0,12.6,1,12,12,12,16,10,8,21,19,13,20,8,13,5,11,8,7,14,21,5,11,19,8,12,12,22
2,1610612741,Chicago Bulls,1,1,0,1.0,25.0,24.4,48.0,0.508,7.6,18.8,0.404,6.8,10.2,0.667,5.0,15.4,20.4,16.0,8.2,3.6,1.4,3.2,9.2,9.2,63.2,-5.6,1,1,1,1,12,16,12,17,8,9,13,7,11,2,11,9,9,15,8,10,5,9,11,10,14,5
3,1610612739,Cleveland Cavaliers,1,1,0,1.0,27.6,23.6,53.0,0.445,8.4,25.0,0.336,5.4,5.8,0.931,6.6,18.6,25.2,14.2,7.0,4.8,1.2,0.4,7.2,8.0,61.0,-4.6,1,1,1,1,7,15,16,8,12,18,6,4,1,22,17,13,15,12,13,16,3,22,6,17,11,6
4,1610612765,Detroit Pistons,1,0,1,0.0,27.1,23.2,48.2,0.481,8.2,18.8,0.436,10.8,12.6,0.857,4.8,20.4,25.2,13.8,6.6,4.6,4.4,3.4,6.2,10.6,65.4,4.4,1,12,12,12,9,12,13,12,9,9,17,20,16,17,10,17,15,10,15,15,18,8,2,6,16,14
5,1610612745,Houston Rockets,1,1,0,1.0,29.3,23.4,59.4,0.394,8.6,23.6,0.364,8.2,10.0,0.82,6.6,19.4,26.0,16.6,7.2,6.2,2.0,7.0,9.4,10.6,63.6,-3.2,1,1,1,1,5,13,19,4,13,15,9,10,9,14,17,14,17,17,12,19,8,1,12,6,15,8
6,1610612754,Indiana Pacers,1,0,1,0.0,24.8,18.2,44.8,0.406,6.6,20.0,0.33,6.4,7.4,0.865,7.6,19.4,27.0,13.6,8.0,3.4,5.2,3.0,9.8,9.6,49.4,3.4,1,12,12,12,13,7,10,5,6,12,4,5,5,18,19,14,18,9,10,8,21,10,15,9,6,13
7,1610612746,LA Clippers,1,0,1,0.0,36.4,32.8,73.6,0.446,10.6,29.2,0.363,9.4,13.4,0.701,13.2,33.0,46.2,21.8,9.8,6.4,8.8,2.2,12.8,11.8,85.6,10.2,1,12,12,12,3,21,21,9,20,21,7,13,19,3,21,21,21,21,3,20,22,13,18,3,21,20
8,1610612747,Los Angeles Lakers,1,1,0,1.0,18.3,16.4,33.4,0.491,5.0,12.8,0.391,10.0,12.4,0.806,4.2,11.4,15.6,11.4,5.8,4.2,2.2,1.8,9.6,8.4,47.8,0.4,1,1,1,1,20,4,2,13,4,3,11,18,15,12,8,3,3,5,17,13,9,16,13,15,5,11
9,1610612763,Memphis Grizzlies,1,0,1,0.0,18.3,17.2,34.6,0.497,9.0,17.8,0.506,9.4,12.6,0.746,2.8,13.4,16.2,13.0,6.0,2.6,1.6,2.8,8.8,9.0,52.8,5.0,1,12,12,12,21,6,4,15,17,7,21,13,16,8,3,6,4,7,16,2,6,11,10,12,7,16


In [20]:
pd.options.display.max_columns = None
POSITIONS = ['G', 'C', 'F']

date = '2024-10-22'
season = '2024-25'
def get_defensive_stats(position: str, date: str, season: str) -> pd.DataFrame:
    """
    Get defensive stats for a specific position on a specific date
    
    Parameters:
    position (str): Position abbreviation ('C', 'F', or 'G')
    date (str): Date in YYYY-MM-DD format
    season (str): Season in YYYY-YY format
    
    Returns:
    pd.DataFrame: Defensive stats for the specified position
    """
    
    try:
        
        stats = leaguedashteamstats.LeagueDashTeamStats(
            measure_type_detailed_defense='Opponent',
            per_mode_detailed='PerGame',
            season=season,
            season_type_all_star='Regular Season',
            date_from_nullable=date,
            date_to_nullable=date,
            player_position_abbreviation_nullable=position
        )
        
        df = stats.get_data_frames()[0]
        df['SEASON'] = season
        df['Position'] = position
        df['Date'] = date
        
        return df
        
    except Exception as e:
        raise
        
        
def get_all_defensive_stats(date: str, season: str) -> pd.DataFrame:
    """
    Get defensive stats for all positions on a specific date
    
    Parameters:
    date (str): Date in YYYY-MM-DD format
    season (str): Season in YYYY-YY format
    
    Returns:
    Optional[pd.DataFrame]: Combined defensive stats for all positions
    """
    all_data = []
    
    for position in POSITIONS:
        try:
            df = get_defensive_stats(position, date, season)
            all_data.append(df)
        except Exception as e:
            continue
    
    if all_data:
        return pd.concat(all_data, ignore_index=True)
    return None

df = get_all_defensive_stats(date, season)
df

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_TOV,OPP_STL,OPP_BLK,OPP_BLKA,OPP_PF,OPP_PFD,OPP_PTS,PLUS_MINUS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,OPP_FGM_RANK,OPP_FGA_RANK,OPP_FG_PCT_RANK,OPP_FG3M_RANK,OPP_FG3A_RANK,OPP_FG3_PCT_RANK,OPP_FTM_RANK,OPP_FTA_RANK,OPP_FT_PCT_RANK,OPP_OREB_RANK,OPP_DREB_RANK,OPP_REB_RANK,OPP_AST_RANK,OPP_TOV_RANK,OPP_STL_RANK,OPP_BLK_RANK,OPP_BLKA_RANK,OPP_PF_RANK,OPP_PFD_RANK,OPP_PTS_RANK,PLUS_MINUS_RANK,SEASON,Position,Date
0,1610612738,Boston Celtics,1,1,0,1.0,29.7,27.6,48.6,0.568,6.8,17.2,0.395,7.6,10.4,0.731,3.2,16.8,20.0,13.8,7.4,1.2,1.6,2.0,7.6,8.8,69.6,-18.2,1,1,1,1,2,3,2,4,3,2,3,2,2,1,1,2,2,3,1,1,2,2,1,4,3,1,2024-25,G,2024-10-22
1,1610612747,Los Angeles Lakers,1,1,0,1.0,20.3,14.8,36.0,0.411,5.6,17.6,0.318,8.0,10.8,0.741,4.4,15.2,19.6,7.6,6.2,1.2,0.4,3.6,9.6,9.0,43.2,-3.0,1,1,1,1,4,1,1,1,2,3,2,3,3,2,2,1,1,1,2,1,1,1,2,2,1,2,2024-25,G,2024-10-22
2,1610612750,Minnesota Timberwolves,1,0,1,0.0,25.6,22.6,50.8,0.445,2.4,15.8,0.152,11.4,13.6,0.838,8.0,17.0,25.0,11.6,4.0,3.6,4.4,0.6,11.6,11.6,59.0,2.6,1,3,3,3,3,2,3,2,1,1,1,4,4,3,4,3,3,2,3,3,4,4,4,1,2,3,2024-25,G,2024-10-22
3,1610612752,New York Knicks,1,0,1,0.0,34.0,37.0,66.4,0.557,22.6,41.2,0.549,5.6,6.4,0.875,7.0,21.0,28.0,25.0,2.6,4.8,2.4,2.0,10.8,9.0,102.2,22.2,1,3,3,3,1,4,4,3,4,4,4,1,1,4,3,4,4,4,4,4,3,2,3,2,4,4,2024-25,G,2024-10-22
4,1610612738,Boston Celtics,1,1,0,1.0,9.9,9.0,16.2,0.556,2.2,6.2,0.355,2.4,3.2,0.75,1.0,6.0,7.0,4.0,2.6,0.6,0.8,0.6,2.4,3.2,22.6,-4.2,1,1,1,1,4,2,1,4,2,1,3,2,2,2,1,1,1,2,3,1,3,3,1,3,1,1,2024-25,C,2024-10-22
5,1610612747,Los Angeles Lakers,1,1,0,1.0,10.6,8.0,18.8,0.426,2.8,8.8,0.318,4.6,6.6,0.697,2.8,8.0,10.8,3.6,3.4,1.2,0.2,1.6,4.6,5.0,23.4,0.0,1,1,1,1,3,1,2,1,3,2,2,3,3,1,3,3,3,1,1,2,1,1,3,2,2,2,2024-25,C,2024-10-22
6,1610612750,Minnesota Timberwolves,1,0,1,0.0,19.2,16.8,38.0,0.442,2.0,12.0,0.167,8.4,10.0,0.84,6.0,12.4,18.4,8.8,2.8,2.8,3.2,0.4,8.8,8.8,44.0,2.8,1,3,3,3,1,4,4,2,1,3,1,4,4,3,4,4,4,4,2,4,4,4,4,1,4,3,2024-25,C,2024-10-22
7,1610612752,New York Knicks,1,0,1,0.0,10.8,9.8,21.6,0.454,5.8,14.2,0.408,1.4,1.6,0.875,2.6,6.2,8.8,6.8,1.0,1.2,0.6,0.8,3.2,2.6,26.8,2.8,1,3,3,3,2,3,3,3,4,4,4,1,1,4,2,2,2,3,4,2,2,2,2,4,3,3,2024-25,C,2024-10-22
8,1610612738,Boston Celtics,1,1,0,1.0,29.5,26.4,48.0,0.55,6.6,18.8,0.351,8.0,10.4,0.769,3.0,17.8,20.8,11.8,6.8,1.2,1.8,1.8,7.6,9.6,67.4,-15.2,1,1,1,1,1,4,3,4,2,2,3,3,3,2,1,3,3,3,2,1,3,2,3,2,4,1,2024-25,F,2024-10-22
9,1610612747,Los Angeles Lakers,1,1,0,1.0,27.7,20.2,49.0,0.412,7.4,23.4,0.316,12.0,16.2,0.741,7.6,19.8,27.4,9.4,9.8,2.8,0.6,4.4,12.4,13.0,59.8,-4.0,1,1,1,1,2,2,4,1,3,3,2,4,4,1,4,4,4,2,1,4,1,1,4,1,2,2,2024-25,F,2024-10-22


In [23]:
import pandas as pd
import sqlite3
conn = sqlite3.connect('nba_stats.db')
pd.read_sql('select * from defensive_stats_by_position', conn).columns

conn.close()


In [None]:
position = 'C'
date = '2024-06-09'
season = '2023-24'

df = stats = leaguedashteamstats.LeagueDashTeamStats(
        measure_type_detailed_defense='Opponent',
        per_mode_detailed='PerGame',
        season=season, 
        season_type_all_star='Playoffs',
        date_from_nullable=date,
        date_to_nullable=date,
        player_position_abbreviation_nullable=position
    ).get_data_frames()[0]

df

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_TOV,OPP_STL,OPP_BLK,OPP_BLKA,OPP_PF,OPP_PFD,OPP_PTS,PLUS_MINUS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,OPP_FGM_RANK,OPP_FGA_RANK,OPP_FG_PCT_RANK,OPP_FG3M_RANK,OPP_FG3A_RANK,OPP_FG3_PCT_RANK,OPP_FTM_RANK,OPP_FTA_RANK,OPP_FT_PCT_RANK,OPP_OREB_RANK,OPP_DREB_RANK,OPP_REB_RANK,OPP_AST_RANK,OPP_TOV_RANK,OPP_STL_RANK,OPP_BLK_RANK,OPP_BLKA_RANK,OPP_PF_RANK,OPP_PFD_RANK,OPP_PTS_RANK,PLUS_MINUS_RANK


In [22]:
position = 'C'
date = '2024-11-08'
season = '2024-25'
df = stats = leaguedashteamstats.LeagueDashTeamStats(
        measure_type_detailed_defense='Opponent',
        per_mode_detailed='PerGame',
        season=season, 
        season_type_all_star='Regular Season',
        date_from_nullable=date,
        date_to_nullable=date,
        player_position_abbreviation_nullable=position
    ).get_data_frames()[0]

df.columns


Index(['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'OPP_FGM',
       'OPP_FGA', 'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A', 'OPP_FG3_PCT',
       'OPP_FTM', 'OPP_FTA', 'OPP_FT_PCT', 'OPP_OREB', 'OPP_DREB', 'OPP_REB',
       'OPP_AST', 'OPP_TOV', 'OPP_STL', 'OPP_BLK', 'OPP_BLKA', 'OPP_PF',
       'OPP_PFD', 'OPP_PTS', 'PLUS_MINUS', 'GP_RANK', 'W_RANK', 'L_RANK',
       'W_PCT_RANK', 'MIN_RANK', 'OPP_FGM_RANK', 'OPP_FGA_RANK',
       'OPP_FG_PCT_RANK', 'OPP_FG3M_RANK', 'OPP_FG3A_RANK', 'OPP_FG3_PCT_RANK',
       'OPP_FTM_RANK', 'OPP_FTA_RANK', 'OPP_FT_PCT_RANK', 'OPP_OREB_RANK',
       'OPP_DREB_RANK', 'OPP_REB_RANK', 'OPP_AST_RANK', 'OPP_TOV_RANK',
       'OPP_STL_RANK', 'OPP_BLK_RANK', 'OPP_BLKA_RANK', 'OPP_PF_RANK',
       'OPP_PFD_RANK', 'OPP_PTS_RANK', 'PLUS_MINUS_RANK'],
      dtype='object')