In [167]:
import requests
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import time

print('Imports completed!')

Imports completed!


# TEAMS

In [168]:
url = "https://api-nba-v1.p.rapidapi.com/teams"
headers = {
	"x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
	"x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

# Extract from url and header parameters
def extract_data(api_url, headers):
    response = requests.get(api_url, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code: {response.status_code}")

# Clean the data
def transform_data(raw_data):
    # ID's to not be included
    exclude_ids = {37, 34, 39, 102, 103, 42, 35, 36, 49}

    # Gather Data for Teams
    teams = [
        {
            'team_id': row['id'],
            'team_code': row['code'],
            'team_name': row['name']
        }
        for row in raw_data['response']
        if 'leagues' in row and 'standard' in row['leagues']
        and row['leagues']['standard']['division'] is not None
        and row['id'] not in exclude_ids
    ]
    
    teams_df = pd.DataFrame(teams)
    return teams_df

# Load data into a table
def load_data(df, table_name, connection_string):
    engine = create_engine(connection_string)
    df.to_sql(table_name, engine, if_exists='replace', index=False)
    print(f'Data has been loaded and added to the {table_name} database')

In [169]:
# Run data pipeline
def run_pipeline():
    raw_data = extract_data(url, headers)
    clean_data = transform_data(raw_data)
    connection_string = 'sqlite:///teams_data.db'
    load_data(clean_data, 'teams', connection_string)
    
run_pipeline()

Data has been loaded and added to the teams database


# GAME SCORES

In [170]:
import pandas as pd
from sqlalchemy import create_engine

def read_data(connection_string):
    engine = create_engine(connection_string)
    query = "SELECT * FROM teams"
    df = pd.read_sql(query, con=engine)
    return df

# Read the data back from the database
connection_string = 'sqlite:///teams_data.db'
teams_data = read_data(connection_string)

# Display the data
teams_data.head()

Unnamed: 0,team_id,team_code,team_name
0,1,ATL,Atlanta Hawks
1,2,BOS,Boston Celtics
2,4,BKN,Brooklyn Nets
3,5,CHA,Charlotte Hornets
4,6,CHI,Chicago Bulls


In [207]:
from itertools import permutations 
import time

url = "https://api-nba-v1.p.rapidapi.com/games"
headers = {
	"x-rapidapi-key": "dda6cf782dmshebe3119a485b548p154863jsn7c9c31f1e7a7",
	"x-rapidapi-host": "api-nba-v1.p.rapidapi.com"
}

# Get a list of all team ids (only take for 2 for example)
team_ids = teams_data['team_id'].values[:2]

def game_matches(team_ids, url, headers):
    # Use team ids to make game matches
    matchups = list(permutations(team_ids, 2))
    
    # Filter games where {id1, id2} != {id2, id1}
    unique_matchups = []
    for game in matchups:
        if game[1] > game[0]:
            unique_matchups.append((game[0], game[1]))
        else:
            continue
    
    # 
    response_arr = []
    for game in unique_matchups:
        querystring = {"h2h":f"{game[0]}-{game[1]}"}
        print(f"Gathering data for matchup {game[0]}-{game[1]}")
        
        # Make the API request
        response = requests.get(url, headers=headers, params=querystring)
        
        # Check if the response is successful:
        if response.status_code == 200:
            game_data = response.json()
            response_arr.append(game_data)
        else:
            print(f"Failed to gather data for {game[0]}-{game[1]}. Status code: {response.status_code}")
        
        # Avoid hitting rate limits
        time.sleep(1)

    # Display gathered data
    return response_arr

results = game_matches(team_ids=team_ids, url=url, headers=headers)

Gathering data for matchup 1-2


In [172]:
for info in results:
    for game in info['response']:
        # Extract date of the game
        date = game.get('date', {}).get('start','N/A')
        
        # Extract teams info
        visitor = game.get('teams', {}).get('visitors', {}).get('name', 'Unknown')
        home = game.get('teams', {}).get('home', {}).get('name', 'Unknown')
     
        visitor_code = game.get('teams', {}).get('visitors', {}).get('code', 'N/A')
        home_code = game.get('teams', {}).get('home', {}).get('code', 'N/A') 
        
        
        # Extract scores
        visitor_scores = game.get('scores', {}).get('visitors', {}).get('linescore', 'No scores')
        home_scores = game.get('scores', {}).get('home', {}).get('linescore', 'No scores')
        
        
        # Display data
        print(f"Date: {date}")
        print(f"Visitor Team: {visitor} ({visitor_code}) - Scores: {visitor_scores}")
        print(f"Home Team: {home} ({home_code}) - Scores: {home_scores}")
        print('-------------------------------------------')
        

Date: 2015-11-25T01:00:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['28', '20', '25', '24']
Home Team: Atlanta Hawks (ATL) - Scores: ['33', '24', '24', '40']
-------------------------------------------
Date: 2016-04-09T23:30:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['32', '39', '20', '16']
Home Team: Atlanta Hawks (ATL) - Scores: ['34', '33', '26', '25']
-------------------------------------------
Date: 2016-04-16T23:00:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['19', '15', '31', '36']
Home Team: Atlanta Hawks (ATL) - Scores: ['30', '21', '21', '30']
-------------------------------------------
Date: 2016-04-19T23:00:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['7', '21', '18', '26']
Home Team: Atlanta Hawks (ATL) - Scores: ['24', '19', '18', '28']
-------------------------------------------
Date: 2016-04-27T00:30:00.000Z
Visitor Team: Boston Celtics (BOS) - Scores: ['20', '19', '23', '21']
Home Team: Atlanta Hawks (ATL) - Scores: ['15', '32', '4

In [204]:
# Get a list of all team ids (only take for 2 for example)
team_ids = teams_data['team_id'].values[:3]

# Use team ids to make game matches
matchups = list(permutations(team_ids, 2))
matchups

unique_matchups = []
for game in matchups:
    if game[1] > game[0]:
        unique_matchups.append((game[0], game[1]))
    else:
        continue

In [205]:
# For each permutation, you ensure that the smaller ID comes first.
team_ids
# Convert each pair to a tuple of the form (min(id1, id2), max(id1, id2)).
# Add these ordered pairs to the set to automatically remove reverse duplicates.

[(1, 2), (1, 4), (2, 4)]