In [36]:
import requests
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import time
import datetime as dt

print('Imports completed!')

Imports completed!


# Game Statistics Data Extraction

In [37]:
# Read in game score data
game_df = pd.read_excel('./data/apiGameScores.xlsx')

In [38]:
game_df.isnull().sum()

game_ids             0
date                 0
visitor              0
home                 0
visitor_points    1092
home_points       1092
dtype: int64

In [39]:
# Drop rows with null values
game_df = game_df.dropna()

# Shape of the data frame
game_df.shape

(13181, 6)

In [40]:
# Create a Data Frame within specific time frame date
recent_games = game_df.loc[game_df['date'] >= '2018-01-01']
recent_games = recent_games.sort_values(by='date')
recent_games = recent_games.reset_index(drop=True)
recent_games

Unnamed: 0,game_ids,date,visitor,home,visitor_points,home_points
0,3459,2018-01-01T00:00:00.000Z,Los Angeles Lakers,Houston Rockets,142.0,148.0
1,3462,2018-01-01T00:00:00.000Z,Memphis Grizzlies,Sacramento Kings,114.0,96.0
2,3461,2018-01-01T00:00:00.000Z,Charlotte Hornets,LA Clippers,98.0,106.0
3,3460,2018-01-01T00:00:00.000Z,Dallas Mavericks,Oklahoma City Thunder,116.0,113.0
4,3463,2018-01-01T01:00:00.000Z,Philadelphia 76ers,Phoenix Suns,123.0,110.0
...,...,...,...,...,...,...
9672,14571,2025-01-02T00:30:00.000Z,New Orleans Pelicans,Miami Heat,108.0,119.0
9673,14573,2025-01-02T00:30:00.000Z,Brooklyn Nets,Toronto Raptors,113.0,130.0
9674,14574,2025-01-02T01:00:00.000Z,Dallas Mavericks,Houston Rockets,99.0,110.0
9675,14575,2025-01-02T02:00:00.000Z,Atlanta Hawks,Denver Nuggets,120.0,139.0


In [None]:
# Define url and necessary parameters 
url = "https://api-nba-v1.p.rapidapi.com/games/statistics"
headers = {
    "x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
    "x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

# Grab game ids to be used with statistics API
game_ids = recent_games['game_ids'].values

statistics = []
batch_size = 100 
batch_count = 0
max_retries = 3
retry_delay = 10
# total_games = len(game_ids)

print("Gathering Data...")
for i, game_id in enumerate(game_ids, start=1):
    querystring = {"id": game_id}
    retries = 0
    
    while retries < max_retries:
        try:
            response = requests.get(url, headers=headers, params=querystring, timeout=35)
        
            # Check if the API request was successful
            if response.status_code == 200:
                game_statistics = response.json()
            
                if len(game_statistics.get('response', [])) > 1:
                    # Extract game id
                    id = game_statistics['parameters']['id']

                    # Extract visitor and home team names
                    visitor = game_statistics['response'][0].get('team', {}).get('name', 'Unknown')
                    home = game_statistics['response'][1].get('team', {}).get('name', 'Unknown')

                    # Extract statistics for visitor team
                    visitor_statics = game_statistics['response'][0]['statistics'][0]
                    visitor_stats = {f"visitor_{key}":value for key, value in visitor_statics.items()}
                    visitor_stats['game_id'] = id
                    visitor_stats['visitor_team'] = visitor
                
                    # Extract statistics for home team
                    home_statics = game_statistics['response'][1]['statistics'][0]
                    home_stats = {f"home_{key}":value for key, value in home_statics.items()}
                    home_stats['game_id'] = id
                    home_stats['home_team'] = home

                    # Append both visitor and home stats
                    statistics.append({**visitor_stats, **home_stats})
                break
                
            elif response.status_code == 403:
                        print(f"403 Error for game ID: {game_id}. Saving current data.")
                        break
            
        except Exception as e:
            retries += 1
            print(f"Error for game_id: {game_id}. Attempt {retries}/{max_retries}. Error: {e}")
            time.sleep(retry_delay)    
        
        if retries == max_retries:
            print(f"Max retries reached for game ID: {game_id}. Skipping to next ID.")
            break   
         
     # Notify and save every 100 games 
    if len(statistics) % 1000 == 0:
        batch_count += 1
        game_statistics_df = pd.DataFrame(statistics)
        batch_file = f"./data/gameStatistics_batch_{batch_count}.csv"
        print(f"Batch {batch_count} saved with {len(statistics)} games collected.")
    
    time.sleep(20) 

# Final save
if statistics:
    batch_count += 1
    game_statistics_df = pd.DataFrame(statistics)
    batch_file = f"./data/gameStatistics_batch_{batch_count}.csv"
    game_statistics_df.to_csv(batch_file, index=False)
    print(f"Final batch saved with {len(statistics)} games collected.")

print("Finished Gathering Data.")

Gathering Data...


In [None]:
# Export as csv file
game_statistics_df.to_csv('./data/gameStatistics.csv', index=False)
game_statistics_df

Unnamed: 0,visitor_fastBreakPoints,visitor_pointsInPaint,visitor_biggestLead,visitor_secondChancePoints,visitor_pointsOffTurnovers,visitor_longestRun,visitor_points,visitor_fgm,visitor_fga,visitor_fgp,...,home_defReb,home_totReb,home_assists,home_pFouls,home_steals,home_turnovers,home_blocks,home_plusMinus,home_min,home_team
0,15.0,48.0,25.0,5.0,23.0,15.0,120.0,40.0,86.0,46.5,...,31.0,46.0,21.0,27.0,9.0,18.0,4.0,-10,240:00,Houston Rockets
1,10.0,40.0,8.0,18.0,12.0,11.0,121.0,43.0,97.0,44.3,...,38.0,46.0,27.0,18.0,1.0,9.0,8.0,-3,240:00,Cleveland Cavaliers
2,14.0,36.0,7.0,3.0,6.0,16.0,108.0,38.0,78.0,48.7,...,36.0,54.0,19.0,18.0,8.0,7.0,4.0,8,240:00,Toronto Raptors
3,4.0,36.0,9.0,3.0,8.0,10.0,80.0,30.0,81.0,37.0,...,42.0,49.0,21.0,17.0,6.0,9.0,3.0,15,240:00,Oklahoma City Thunder
4,16.0,48.0,5.0,12.0,6.0,10.0,105.0,39.0,90.0,43.3,...,42.0,58.0,28.0,18.0,7.0,9.0,11.0,13,240:00,Memphis Grizzlies
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4067,,,,,,,110.0,38.0,99.0,38.4,...,38.0,53.0,22.0,28.0,15.0,17.0,6.0,4,240:00,LA Clippers
4068,,,,,,,132.0,48.0,90.0,53.3,...,32.0,48.0,23.0,25.0,6.0,19.0,1.0,-24,240:00,Portland Trail Blazers
4069,,,,,,,105.0,39.0,90.0,43.3,...,35.0,43.0,26.0,16.0,7.0,12.0,4.0,6,240:00,Indiana Pacers
4070,,,,,,,117.0,46.0,90.0,51.1,...,30.0,39.0,23.0,13.0,6.0,15.0,5.0,-27,240:00,Phoenix Suns
