In [None]:
import pandas as pd
from datetime import date, timedelta
import nba_api.stats.endpoints as endpoints
from nba_api.live.nba.endpoints import boxscore
from modules import *

: 

In [None]:
import time
import random
from datetime import date, timedelta
import pandas as pd

# Initialize DataFrames
teams_df = pd.DataFrame(columns=['team_id', 'season_year', 'team_location', 'team_name', 'team_abbreviation'])
players_df = pd.DataFrame(columns=['player_id', 'player_first_name', 'player_last_name'])
games_df = pd.DataFrame(columns=['game_id', 'season_year', 'game_date', 'home_team_id', 'away_team_id'])
player_game_stats_df = pd.DataFrame(columns=['game_id', 'player_id', 'team_id', 'player_game_stats'])

start_date = date(2024, 4, 15)
end_date = date(2025, 3, 29)

current_date = start_date
current_highest_season = ""

# Request limit settings
delay_between_requests = 1  # Start with a 60-second delay (1 request per minute)
max_delay = 360  # Maximum delay of 6 minutes (to prevent overwhelming the server)

while current_date <= end_date:
    print(f"Processing games for {current_date}...")
    
    # Add a random sleep time to avoid hitting rate limits
    time.sleep(random.uniform(1.0, 2.0))
    
    # Fetch the games data with retry and exponential backoff
    games = fetch_with_retry(endpoints.scoreboardv2.ScoreboardV2, game_date=current_date)
    
    if games is None:
        # If the fetch failed after retries, skip this date
        start_date += timedelta(days=1)
        print(f"Failed to fetch games for {current_date}. Skipping.")
        continue
    
    try:
        games_data = games.get_data_frames()[0]
        game_ids = games_data['GAME_ID']

        if len(game_ids) > 0:
            current_highest_season = get_season(game_ids[0])  # Assuming get_season is defined somewhere
        else:
            print(f"No games found for {current_date}")
    except Exception as e:
        print(f"Error processing games for {current_date}: {e}. Skipping")
        start_date += timedelta(days=1)
        continue
    
    # Loop through all the game IDs and fetch the box score data
    for game_id in game_ids:
        # Get all stats using BoxScoreTraditionalV2 with retry and exponential backoff
        while True:
            boxscore = fetch_with_retry(endpoints.boxscoretraditionalv2.BoxScoreTraditionalV2, game_id=game_id)
            
            if boxscore is None:
                print(f"Error fetching stats for game {game_id} -- boxscore is None. Tryinhg again")
                continue

            try:
                player_stats = boxscore.get_data_frames()[0]
                team_stats = boxscore.get_data_frames()[1]
                
                teams_df = fill_teams_df(game_id, team_stats, teams_df)
                players_df = fill_players_df(player_stats, players_df)
                games_df = fill_games_df(game_id, current_date, games_df)
                player_game_stats_df = get_player_game_stats(game_id, player_stats, player_game_stats_df)

                teams_df.to_csv(f'teams-old-3.csv', index=False)
                players_df.to_csv(f'players-old-3.csv', index=False)
                games_df.to_csv(f'games-old-3.csv', index=False)
                player_game_stats_df.to_csv(f'player_game_stats-old-3.csv', index=False)
                print(f"Processed game {game_id} for date {current_date}.")

                break


            except Exception as e:
                print(f"Error processing stats for game {game_id}: {e}. Trying Again")
                continue

    # After processing all games, move to the next 1 days
    current_date += timedelta(days=1)

    # Implement request throttling based on rate limits
    time.sleep(delay_between_requests + random.uniform(0, 1))  # Adding a bit of randomness to avoid clustered requests

    # Apply exponential backoff: increase delay after each request to manage rate limits
    delay_between_requests = min(delay_between_requests, max_delay)

# print(teams_df)
# print('\n')
# print(players_df)
# print('\n')
# print(games_df)
# print('\n')
# print(player_game_stats_df)

print(len(games_df.game_id.unique()), "games processed")
print(len(player_game_stats_df.game_id.unique()), "unique games with player stats")


Processing games for 2024-04-15...
No games found for 2024-04-15
Processing games for 2024-04-16...
Processed game 0052300121 for date 2024-04-16.
Processed game 0052300131 for date 2024-04-16.
Processing games for 2024-04-17...
