In [303]:
import requests
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import time
import datetime as dt

print('Imports completed!')

Imports completed!


# TEAMS

In [304]:
url = "https://api-nba-v1.p.rapidapi.com/teams"
headers = {
	"x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
	"x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

# Extract from url and header parameters
def extract_data(api_url, headers):
    response = requests.get(api_url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code: {response.status_code}")

# Clean the data
def transform_data(raw_data):
    # ID's to not be included
    exclude_ids = {37, 34, 39, 102, 103, 42, 35, 36, 49}

    # Gather Data for Teams
    teams = [
        {
            'team_id': row['id'],
            'team_code': row['code'],
            'team_name': row['name']
        }
        for row in raw_data['response']
        if 'leagues' in row and 'standard' in row['leagues']
        and row['leagues']['standard']['division'] is not None
        and row['id'] not in exclude_ids
    ]
    
    teams_df = pd.DataFrame(teams)
    return teams_df

# Load data into a table
def load_data(df, table_name, connection_string):
    engine = create_engine(connection_string)
    df.to_sql(table_name, engine, if_exists='replace', index=False)
    print(f'Data has been loaded and added to the {table_name} database')

In [305]:
# Run data pipeline
def run_pipeline():
    raw_data = extract_data(url, headers)
    clean_data = transform_data(raw_data)
    connection_string = 'sqlite:///teams_data.db'
    load_data(clean_data, 'teams', connection_string)
    
run_pipeline()

Data has been loaded and added to the teams database


# GAME SCORES

In [306]:

import pandas as pd
from sqlalchemy import create_engine

def read_data(connection_string):
    engine = create_engine(connection_string)
    query = "SELECT * FROM teams"
    df = pd.read_sql(query, con=engine)
    return df

# Read the data back from the database
connection_string = 'sqlite:///teams_data.db'
teams_data = read_data(connection_string)

# Export DataFrame to Excel
teams_data.to_excel('./data/team_data.xlsx', index=False)

# Display the data
teams_data

Unnamed: 0,team_id,team_code,team_name
0,1,ATL,Atlanta Hawks
1,2,BOS,Boston Celtics
2,4,BKN,Brooklyn Nets
3,5,CHA,Charlotte Hornets
4,6,CHI,Chicago Bulls
5,7,CLE,Cleveland Cavaliers
6,8,DAL,Dallas Mavericks
7,9,DEN,Denver Nuggets
8,10,DET,Detroit Pistons
9,11,GSW,Golden State Warriors


In [307]:
from itertools import combinations 
import time

url = "https://api-nba-v1.p.rapidapi.com/games"
headers = {
	"x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
	"x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

team_ids = teams_data['team_id'].values

def game_matches(team_ids, url, headers):
    unique_matchups = list(combinations(team_ids, 2))

    response_arr = []
    for game in unique_matchups:
        querystring = {"h2h": f"{game[0]}-{game[1]}"}
        print(f"Gathering data for matchup {game[0]}-{game[1]}")

        response = requests.get(url, headers=headers, params=querystring)

        if response.status_code == 200:
            game_data = response.json()
            response_arr.append(game_data)
        else:
            print(f"Failed to gather data for {game[0]}-{game[1]}. Status code: {response.status_code}")

        time.sleep(10)

    print("Data Collection Completed!")
    return response_arr

results = game_matches(team_ids, url, headers)

Gathering data for matchup 1-2
Gathering data for matchup 1-4
Gathering data for matchup 1-5
Gathering data for matchup 1-6
Gathering data for matchup 1-7
Gathering data for matchup 1-8
Gathering data for matchup 1-9
Gathering data for matchup 1-10
Gathering data for matchup 1-11
Gathering data for matchup 1-14
Gathering data for matchup 1-15
Gathering data for matchup 1-16
Gathering data for matchup 1-17
Gathering data for matchup 1-19
Gathering data for matchup 1-20
Gathering data for matchup 1-21
Gathering data for matchup 1-22
Gathering data for matchup 1-23
Gathering data for matchup 1-24
Gathering data for matchup 1-25
Gathering data for matchup 1-26
Gathering data for matchup 1-27
Gathering data for matchup 1-28
Gathering data for matchup 1-29
Gathering data for matchup 1-30
Gathering data for matchup 1-31
Gathering data for matchup 1-38
Gathering data for matchup 1-40
Gathering data for matchup 1-41
Gathering data for matchup 2-4
Gathering data for matchup 2-5
Gathering data fo

In [308]:
game_scores = []
for info in results:
    for game in info['response']:
        # Extract id for each game
        game_ids = game.get('id', 'N/A')
        
        # Extract date of the game
        date = game.get('date', {}).get('start','N/A')
        
        # Extract teams info
        visitor = game.get('teams', {}).get('visitors', {}).get('name', 'Unknown')
        home = game.get('teams', {}).get('home', {}).get('name', 'Unknown')
     
        visitor_code = game.get('teams', {}).get('visitors', {}).get('code', 'N/A')
        home_code = game.get('teams', {}).get('home', {}).get('code', 'N/A') 
        
        # Initialize lists to store total scores
        visitor_total_scores = []
        home_total_scores = []
        
        # Extract scores by quarter
        visitor_scores = game.get('scores', {}).get('visitors', {}).get('linescore', [])
        home_scores = game.get('scores', {}).get('home', {}).get('linescore', [])
        
        visitor_points = game.get('scores', {}).get('visitors', {}).get('points', '0')
        home_points = game.get('scores', {}).get('home', {}).get('points', '0')
        
        if visitor_scores and home_scores and visitor_points is not None and home_points is not None:
            visitor_total_scores.append(visitor_points)
            home_total_scores.append(home_points)
        
        # Display data if scores and points exist
        if visitor_scores and home_scores and visitor_total_scores and home_total_scores:
            print(f"Date: {date}")
            print(f"Visitor Team: {visitor} ({visitor_code}) - Scores: {visitor_scores} - Total Points: {visitor_total_scores[0]}")
            print(f"Home Team: {home} ({home_code}) - Scores: {home_scores} - Total Points: {home_total_scores[0]}")
            print('-------------------------------------------')
            
        game_scores.append({
            'game_ids': game_ids,
            'date': date,
            'visitor': visitor,
            'home': home,
            'visitor_points': visitor_points,
            'home_points': home_points
        })
        
    game_score_df = pd.DataFrame(game_scores)

Date: 2015-11-25T01:00:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['28', '20', '25', '24'] - Total Points: 97
Home Team: Atlanta Hawks (ATL) - Scores: ['33', '24', '24', '40'] - Total Points: 121
-------------------------------------------
Date: 2016-04-09T23:30:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['32', '39', '20', '16'] - Total Points: 107
Home Team: Atlanta Hawks (ATL) - Scores: ['34', '33', '26', '25'] - Total Points: 118
-------------------------------------------
Date: 2016-04-16T23:00:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['19', '15', '31', '36'] - Total Points: 101
Home Team: Atlanta Hawks (ATL) - Scores: ['30', '21', '21', '30'] - Total Points: 102
-------------------------------------------
Date: 2016-04-19T23:00:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['7', '21', '18', '26'] - Total Points: 72
Home Team: Atlanta Hawks (ATL) - Scores: ['24', '19', '18', '28'] - Total Points: 89
-------------------------------------------


In [309]:
# Create a Data Frame within specific time frame date
recent_games = game_score_df.loc[game_score_df['date'] >= '2024-01-01']
recent_games = recent_games.sort_values(by='date')
recent_games = recent_games.reset_index(drop=True)

In [310]:
game_score_df.to_excel('./data/apiGameScores.xlsx', index=False)
game_score_df

Unnamed: 0,game_ids,date,visitor,home,visitor_points,home_points
0,319,2015-11-25T01:00:00.000Z,Boston Celtics,Atlanta Hawks,97.0,121.0
1,1300,2016-04-09T23:30:00.000Z,Boston Celtics,Atlanta Hawks,107.0,118.0
2,1344,2016-04-16T23:00:00.000Z,Boston Celtics,Atlanta Hawks,101.0,102.0
3,1353,2016-04-19T23:00:00.000Z,Boston Celtics,Atlanta Hawks,72.0,89.0
4,1376,2016-04-27T00:30:00.000Z,Boston Celtics,Atlanta Hawks,83.0,110.0
...,...,...,...,...,...,...
14237,9048,2021-04-13T01:00:00.000Z,Washington Wizards,Utah Jazz,125.0,121.0
14238,10012,2021-12-19T02:00:00.000Z,Washington Wizards,Utah Jazz,109.0,103.0
14239,11527,2022-12-23T02:00:00.000Z,Washington Wizards,Utah Jazz,112.0,120.0
14240,13430,2024-03-05T02:00:00.000Z,Washington Wizards,Utah Jazz,115.0,127.0


In [311]:
game_score_df.loc[(game_score_df['visitor'] == 'Boston Celtics') & (game_score_df['home'] == 'Atlanta Hawks')]

Unnamed: 0,game_ids,date,visitor,home,visitor_points,home_points
0,319,2015-11-25T01:00:00.000Z,Boston Celtics,Atlanta Hawks,97.0,121.0
1,1300,2016-04-09T23:30:00.000Z,Boston Celtics,Atlanta Hawks,107.0,118.0
2,1344,2016-04-16T23:00:00.000Z,Boston Celtics,Atlanta Hawks,101.0,102.0
3,1353,2016-04-19T23:00:00.000Z,Boston Celtics,Atlanta Hawks,72.0,89.0
4,1376,2016-04-27T00:30:00.000Z,Boston Celtics,Atlanta Hawks,83.0,110.0
5,2125,2017-01-14T01:00:00.000Z,Boston Celtics,Atlanta Hawks,103.0,101.0
6,2709,2017-04-07T00:00:00.000Z,Boston Celtics,Atlanta Hawks,116.0,123.0
7,3065,2017-11-07T00:30:00.000Z,Boston Celtics,Atlanta Hawks,110.0,107.0
8,3149,2017-11-19T00:30:00.000Z,Boston Celtics,Atlanta Hawks,110.0,99.0
9,4654,2018-11-24T00:30:00.000Z,Boston Celtics,Atlanta Hawks,114.0,96.0


# Game Statistics Data Extraction

In [312]:
# Read in game score data
game_df = pd.read_excel('./data/apiGameScores.xlsx')

In [313]:
game_df.isnull().sum()

game_ids             0
date                 0
visitor              0
home                 0
visitor_points    1198
home_points       1198
dtype: int64

In [314]:
# Drop rows with null values
game_df = game_df.dropna()

# Shape of the data frame
game_df.shape

(13044, 6)

In [315]:
# Create a Data Frame within specific time frame date
recent_games = game_df.loc[game_df['date'] >= '2020-01-01']
recent_games = recent_games.sort_values(by='date')
recent_games = recent_games.reset_index(drop=True)
recent_games

Unnamed: 0,game_ids,date,visitor,home,visitor_points,home_points
0,6889,2020-01-01T00:00:00.000Z,Cleveland Cavaliers,Toronto Raptors,97.0,117.0
1,6891,2020-01-01T00:00:00.000Z,Golden State Warriors,San Antonio Spurs,113.0,117.0
2,6890,2020-01-01T00:00:00.000Z,Denver Nuggets,Houston Rockets,104.0,130.0
3,6892,2020-01-01T01:00:00.000Z,Dallas Mavericks,Oklahoma City Thunder,101.0,106.0
4,6893,2020-01-01T23:00:00.000Z,Orlando Magic,Washington Wizards,122.0,101.0
...,...,...,...,...,...,...
6642,14466,2024-12-08T23:30:00.000Z,Phoenix Suns,Orlando Magic,110.0,115.0
6643,14467,2024-12-09T00:00:00.000Z,Memphis Grizzlies,Washington Wizards,118.0,86.0
6644,14468,2024-12-09T00:00:00.000Z,New Orleans Pelicans,San Antonio Spurs,105.0,114.0
6645,14469,2024-12-09T01:30:00.000Z,Minnesota Timberwolves,Golden State Warriors,10.0,7.0


In [None]:
# Define url and necessary parameters 
url = "https://api-nba-v1.p.rapidapi.com/games/statistics"
headers = {
    "x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
    "x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

# Grab game ids to be used with statistics API
game_ids = recent_games['game_ids'].values

statistics = []
total_games = len(game_ids)

for i, game_id in enumerate(game_ids, start=1):
    querystring = {"id": game_id}
    response = requests.get(url, headers=headers, params=querystring, timeout=30)
    
     # Check if the API request was successful
    if response.status_code == 200:
        game_statistics = response.json()
        
        if len(game_statistics.get('response', [])) > 1:
            # Extract game id
            id = game_statistics['parameters']['id']

            # Extract visitor and home team names
            visitor = game_statistics['response'][0].get('team', {}).get('name', 'Unknown')
            home = game_statistics['response'][1].get('team', {}).get('name', 'Unknown')

            # Extract statistics for visitor team
            visitor_statics = game_statistics['response'][0]['statistics'][0]
            visitor_stats = {f"visitor_{key}":value for key, value in visitor_statics.items()}
            visitor_stats['game_id'] = id
            visitor_stats['visitor_team'] = visitor
            
            # Extract statistics for home team
            home_statics = game_statistics['response'][1]['statistics'][0]
            home_stats = {f"home_{key}":value for key, value in home_statics.items()}
            home_stats['game_id'] = id
            home_stats['home_team'] = home

            # Append both visitor and home stats
            statistics.append({**visitor_stats, **home_stats})
            
            print(f"Collecting statistics for id: {game_id}")
           
        else:
            print(f"No statistics found for Game ID: {game_id}")
            
    else:
        print(f"Failed to fetch data for Game ID: {game_id}. Status Code: {response.status_code}")
    
    # Notify when 100 games have been collected
    if len(statistics) % 100 == 0:
        print(f"{len(statistics)} games have been collected.")
        
    # Sleep function
    time.sleep(30)    
     
# Create the Data Frame
print("Finished Gather Data.")
game_statistics_df = pd.DataFrame(statistics)

Collecting statistics for id: 6889
Collecting statistics for id: 6891
Collecting statistics for id: 6890
Collecting statistics for id: 6892
Collecting statistics for id: 6893
Collecting statistics for id: 6894
Collecting statistics for id: 6895
Collecting statistics for id: 6896
Collecting statistics for id: 6897
Collecting statistics for id: 6898
Collecting statistics for id: 6899
Collecting statistics for id: 6900
Collecting statistics for id: 6901
Collecting statistics for id: 6902
Collecting statistics for id: 6903
Collecting statistics for id: 6904
Collecting statistics for id: 6905
Collecting statistics for id: 6907
Collecting statistics for id: 6906
Collecting statistics for id: 6908
Collecting statistics for id: 6909
Collecting statistics for id: 6910
Collecting statistics for id: 6911
Collecting statistics for id: 6912
Collecting statistics for id: 6913
Collecting statistics for id: 6914
Collecting statistics for id: 6915
Collecting statistics for id: 6916
Collecting statistic

In [318]:
# Export as csv file
game_statistics_df.to_csv('./data/gameStatistics.csv', index=False)
game_statistics_df

Unnamed: 0,visitor_fastBreakPoints,visitor_pointsInPaint,visitor_biggestLead,visitor_secondChancePoints,visitor_pointsOffTurnovers,visitor_longestRun,visitor_points,visitor_fgm,visitor_fga,visitor_fgp,...,home_defReb,home_totReb,home_assists,home_pFouls,home_steals,home_turnovers,home_blocks,home_plusMinus,home_min,home_team
0,,,,,,,88,29,77,37.7,...,30,46,20,20,10,10,5,20,240:00,New York Knicks
1,,,,,,,125,47,85,55.3,...,20,26,21,24,5,5,3,1,240:00,Dallas Mavericks
2,,,,,,,106,38,96,39.6,...,40,42,26,16,7,14,6,17,240:00,Brooklyn Nets
3,,,,,,,102,39,81,48.1,...,35,50,20,18,9,21,5,1,240:00,Cleveland Cavaliers
4,,,,,,,104,36,83,43.4,...,37,52,21,22,6,14,6,12,240:00,Detroit Pistons
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2490,,,,,,,108,39,91,42.9,...,42,50,29,22,6,11,4,19,240:00,Chicago Bulls
2491,,,,,,,103,35,77,45.5,...,32,41,23,19,5,13,7,3,240:00,Milwaukee Bucks
2492,,,,,,,111,42,98,42.9,...,37,44,31,20,12,12,9,6,240:00,Houston Rockets
2493,,,,,,,115,43,84,51.2,...,26,39,30,17,12,10,5,13,240:00,San Antonio Spurs


In [None]:
game_statistics_df.loc[(game_statistics_df['home_team'] == 'Boston Celtics') & (game_statistics_df['visitor_team'] == 'Atlanta Hawks')]

Unnamed: 0,visitor_fastBreakPoints,visitor_pointsInPaint,visitor_biggestLead,visitor_secondChancePoints,visitor_pointsOffTurnovers,visitor_longestRun,visitor_points,visitor_fgm,visitor_fga,visitor_fgp,...,home_defReb,home_totReb,home_assists,home_pFouls,home_steals,home_turnovers,home_blocks,home_plusMinus,home_min,home_team
469,,,,,,,125,50,100,50.0,...,35,40,35,21,6,9,6,9,240:00,Boston Celtics
717,,,,,,,130,51,91,56.0,...,23,29,31,17,11,12,0,-8,240:00,Boston Celtics
726,,,,,,,121,43,98,43.9,...,40,49,25,23,4,16,5,8,240:00,Boston Celtics
737,,,,,,,120,44,94,46.8,...,36,47,24,16,7,7,10,8,240:00,Boston Celtics
1910,,,,,,,120,46,87,52.9,...,26,38,21,16,7,9,8,-2,240:00,Boston Celtics
1934,,,,,,,123,51,104,49.0,...,34,43,28,15,7,12,6,-1,240:00,Boston Celtics
2327,,,,,,,93,37,89,41.6,...,39,50,27,14,15,13,7,30,240:00,Boston Celtics
