In [3]:
import requests
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import time
import datetime as dt

print('Imports completed!')

Imports completed!


# TEAMS

In [4]:
url = "https://api-nba-v1.p.rapidapi.com/teams"
headers = {
	"x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
	"x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

# Extract from url and header parameters
def extract_data(api_url, headers):
    response = requests.get(api_url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code: {response.status_code}")

# Clean the data
def transform_data(raw_data):
    # ID's to not be included
    exclude_ids = {37, 34, 39, 102, 103, 42, 35, 36, 49}

    # Gather Data for Teams
    teams = [
        {
            'team_id': row['id'],
            'team_code': row['code'],
            'team_name': row['name']
        }
        for row in raw_data['response']
        if 'leagues' in row and 'standard' in row['leagues']
        and row['leagues']['standard']['division'] is not None
        and row['id'] not in exclude_ids
    ]
    
    teams_df = pd.DataFrame(teams)
    return teams_df

# Load data into a table
def load_data(df, table_name, connection_string):
    engine = create_engine(connection_string)
    df.to_sql(table_name, engine, if_exists='replace', index=False)
    print(f'Data has been loaded and added to the {table_name} database')

In [5]:
# Run data pipeline
def run_pipeline():
    raw_data = extract_data(url, headers)
    clean_data = transform_data(raw_data)
    connection_string = 'sqlite:///teams_data.db'
    load_data(clean_data, 'teams', connection_string)
    
run_pipeline()

Data has been loaded and added to the teams database


# GAME SCORES

In [6]:

import pandas as pd
from sqlalchemy import create_engine

def read_data(connection_string):
    engine = create_engine(connection_string)
    query = "SELECT * FROM teams"
    df = pd.read_sql(query, con=engine)
    return df

# Read the data back from the database
connection_string = 'sqlite:///teams_data.db'
teams_data = read_data(connection_string)

# Export DataFrame to Excel
teams_data.to_excel('./data/team_data.xlsx', index=False)

# Display the data
teams_data


Unnamed: 0,team_id,team_code,team_name
0,1,ATL,Atlanta Hawks
1,2,BOS,Boston Celtics
2,4,BKN,Brooklyn Nets
3,5,CHA,Charlotte Hornets
4,6,CHI,Chicago Bulls
5,7,CLE,Cleveland Cavaliers
6,8,DAL,Dallas Mavericks
7,9,DEN,Denver Nuggets
8,10,DET,Detroit Pistons
9,11,GSW,Golden State Warriors


In [None]:
from itertools import combinations 
import time

url = "https://api-nba-v1.p.rapidapi.com/games"
headers = {
	"x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
	"x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

team_ids = teams_data['team_id'].values

def game_matches(team_ids, url, headers):
    unique_matchups = list(combinations(team_ids, 2))

    response_arr = []
    for game in unique_matchups:
        querystring = {"h2h": f"{game[0]}-{game[1]}"}
        print(f"Gathering data for matchup {game[0]}-{game[1]}")

        response = requests.get(url, headers=headers, params=querystring)

        if response.status_code == 200:
            game_data = response.json()
            response_arr.append(game_data)
        else:
            print(f"Failed to gather data for {game[0]}-{game[1]}. Status code: {response.status_code}")

        time.sleep(10)

    print("Data Collection Completed!")
    return response_arr

results = game_matches(team_ids, url, headers)

Gathering data for matchup 1-2
Gathering data for matchup 1-4
Gathering data for matchup 1-5
Gathering data for matchup 1-6
Gathering data for matchup 1-7
Gathering data for matchup 1-8
Gathering data for matchup 1-9
Gathering data for matchup 1-10
Gathering data for matchup 1-11
Gathering data for matchup 1-14
Gathering data for matchup 1-15
Gathering data for matchup 1-16
Gathering data for matchup 1-17
Gathering data for matchup 1-19
Gathering data for matchup 1-20
Gathering data for matchup 1-21
Gathering data for matchup 1-22
Gathering data for matchup 1-23
Gathering data for matchup 1-24
Gathering data for matchup 1-25
Gathering data for matchup 1-26
Gathering data for matchup 1-27
Gathering data for matchup 1-28
Gathering data for matchup 1-29
Gathering data for matchup 1-30
Gathering data for matchup 1-31
Gathering data for matchup 1-38
Gathering data for matchup 1-40
Gathering data for matchup 1-41
Gathering data for matchup 2-4
Gathering data for matchup 2-5
Gathering data fo

In [None]:
game_scores = []
for info in results:
    for game in info['response']:
        # Extract id for each game
        game_ids = game.get('id', 'N/A')
        
        # Extract date of the game
        date = game.get('date', {}).get('start','N/A')
        
        # Extract teams info
        visitor = game.get('teams', {}).get('visitors', {}).get('name', 'Unknown')
        home = game.get('teams', {}).get('home', {}).get('name', 'Unknown')
     
        visitor_code = game.get('teams', {}).get('visitors', {}).get('code', 'N/A')
        home_code = game.get('teams', {}).get('home', {}).get('code', 'N/A') 
        
        # Initialize lists to store total scores
        visitor_total_scores = []
        home_total_scores = []
        
        # Extract scores by quarter
        visitor_scores = game.get('scores', {}).get('visitors', {}).get('linescore', [])
        home_scores = game.get('scores', {}).get('home', {}).get('linescore', [])
        
        visitor_points = game.get('scores', {}).get('visitors', {}).get('points', '0')
        home_points = game.get('scores', {}).get('home', {}).get('points', '0')
        
        if visitor_scores and home_scores and visitor_points is not None and home_points is not None:
            visitor_total_scores.append(visitor_points)
            home_total_scores.append(home_points)
        
        # Display data if scores and points exist
        if visitor_scores and home_scores and visitor_total_scores and home_total_scores:
            print(f"Date: {date}")
            print(f"Visitor Team: {visitor} ({visitor_code}) - Scores: {visitor_scores} - Total Points: {visitor_total_scores[0]}")
            print(f"Home Team: {home} ({home_code}) - Scores: {home_scores} - Total Points: {home_total_scores[0]}")
            print('-------------------------------------------')
            
        game_scores.append({
            'game_ids': game_ids,
            'date': date,
            'visitor': visitor,
            'home': home,
            'visitor_points': visitor_points,
            'home_points': home_points
        })
        
    game_score_df = pd.DataFrame(game_scores)

NameError: name 'results' is not defined

In [None]:
game_score_df.to_excel('./data/apiGameScores.xlsx', index=False)
game_score_df

Unnamed: 0,date,visitor,home,visitor_points,home_points
0,2015-11-25T01:00:00.000Z,Boston Celtics,Atlanta Hawks,97.0,121.0
1,2016-04-09T23:30:00.000Z,Boston Celtics,Atlanta Hawks,107.0,118.0
2,2016-04-16T23:00:00.000Z,Boston Celtics,Atlanta Hawks,101.0,102.0
3,2016-04-19T23:00:00.000Z,Boston Celtics,Atlanta Hawks,72.0,89.0
4,2016-04-27T00:30:00.000Z,Boston Celtics,Atlanta Hawks,83.0,110.0
...,...,...,...,...,...
14237,2021-04-13T01:00:00.000Z,Washington Wizards,Utah Jazz,125.0,121.0
14238,2021-12-19T02:00:00.000Z,Washington Wizards,Utah Jazz,109.0,103.0
14239,2022-12-23T02:00:00.000Z,Washington Wizards,Utah Jazz,112.0,120.0
14240,2024-03-05T02:00:00.000Z,Washington Wizards,Utah Jazz,115.0,127.0


In [None]:
game_score_df.loc[(game_score_df['visitor'] == 'Indiana Pacers') & (game_score_df['home'] == 'Orlando Magic')]

Unnamed: 0,date,visitor,home,visitor_points,home_points
8964,2016-01-07T00:00:00.000Z,Indiana Pacers,Orlando Magic,95.0,86.0
8965,2016-02-21T23:00:00.000Z,Indiana Pacers,Orlando Magic,105.0,102.0
8966,2016-10-14T23:00:00.000Z,Indiana Pacers,Orlando Magic,106.0,114.0
8967,2017-02-02T00:00:00.000Z,Indiana Pacers,Orlando Magic,98.0,88.0
8968,2017-04-08T22:00:00.000Z,Indiana Pacers,Orlando Magic,127.0,112.0
8969,2017-11-21T00:00:00.000Z,Indiana Pacers,Orlando Magic,105.0,97.0
8970,2018-12-08T00:00:00.000Z,Indiana Pacers,Orlando Magic,112.0,90.0
8971,2019-02-01T00:00:00.000Z,Indiana Pacers,Orlando Magic,100.0,107.0
8972,2019-11-10T23:00:00.000Z,Indiana Pacers,Orlando Magic,109.0,102.0
8973,2020-03-25,Indiana Pacers,Orlando Magic,,
