In [1]:
import requests
import pandas as pd
import numpy as np
import json
from dotenv import load_dotenv
import os
import time
import datetime as dt
from datetime import datetime

In [2]:
api_key = {}
try:
    with open('api_key.json') as file:
        api_key = json.load(file)
except FileNotFoundError:
    print("Error: api_key.json file not found.")

In [3]:
# The team_id_grab() function grabs the teams id,name,conference and conference position and stores it in a DataFrame
    
def team_id_dict():
    url = "https://v1.basketball.api-sports.io/standings?league=12&season=2022-2023"

    payload = {}
    headers = {
        'x-rapidapi-key': api_key['x-rapidapi-key'],
        'x-rapidapi-host': 'v1.basketball.api-sports.io'
    }
     # request.request sends a request of a specified method to the url (in this case the method is GET)
    response = requests.request("GET", url, headers=headers, data=payload)

    # json.loads deserialises a JSON object to a standard python object (good for parsing)
    # also it turns nulls to a None python object
    standings = json.loads(response.text)

    # team_lvl (team level) grabs the section of the returned data that we need to parse through to collect 
    # info on the team
    team_lvl = standings['response'][0]
       
    # Creating a DataFrame to store the team API ids
    team_ids_df = pd.DataFrame(columns = ['Team', 'Team ID', 'Conference', 'Conference Position'])
     
    # Creating variables that hold the requests daily limit remaining (100 per day) 
    # and the per minute requests remaining (10 per minute)
    daily_calls_remaining = response.headers['x-ratelimit-requests-remaining']
    perMinute_calls_remaining = response.headers['X-RateLimit-Remaining']
    
    print(f'Daily calls remaining = {daily_calls_remaining}')
    print(f'Calls left per minute = {perMinute_calls_remaining}\n')

    team_id_dict = {}
    
    for team in team_lvl:
        if team['group']['name'] == "Western Conference" or team['group']['name'] == "Eastern Conference":
            t_id = int(team['team']['id'])
            t_name = team['team']['name']
            team_id_dict[t_id] = t_name
        else:
            break
            
    return team_id_dict


def team_id_list():
    url = "https://v1.basketball.api-sports.io/standings?league=12&season=2022-2023"

    payload = {}
    headers = {
        'x-rapidapi-key': api_key['x-rapidapi-key'],
        'x-rapidapi-host': 'v1.basketball.api-sports.io'
    }
    # request.request sends a request of a specified method to the url (in this case the method is GET)
    response = requests.request("GET", url, headers=headers, data=payload)

    # json.loads deserialises a JSON object to a standard python object (good for parsing)
    # also it turns nulls to a None python object
    standings = json.loads(response.text)

    # team_lvl (team level) grabs the section of the returned data that we need to parse through to collect 
    # info on the team
    team_lvl = standings['response'][0]
       
    # Creating a DataFrame to store the team API ids
    team_ids_df = pd.DataFrame(columns = ['Team', 'Team ID', 'Conference', 'Conference Position'])
     
    # Creating variables that hold the requests daily limit remaining (100 per day) 
    # and the per minute requests remaining (10 per minute)
    daily_calls_remaining = response.headers['x-ratelimit-requests-remaining']
    perMinute_calls_remaining = response.headers['X-RateLimit-Remaining']
    
    print(f'Daily calls remaining = {daily_calls_remaining}')
    print(f'Calls left per minute = {perMinute_calls_remaining}\n')
    
    team_id_list = []
    
    for team in team_lvl:
        if team['group']['name'] == "Western Conference" or team['group']['name'] == "Eastern Conference":
            id = int(team['team']['id'])

            team_id_list.append(id)
        else:
            break
    
    return team_id_list
    
def team_position_df():
    # request.request sends a request of a specified method to the url (in this case the method is GET)
    url = "https://v1.basketball.api-sports.io/standings?league=12&season=2022-2023"

    payload = {}
    headers = {
        'x-rapidapi-key': api_key['x-rapidapi-key'],
        'x-rapidapi-host': 'v1.basketball.api-sports.io'
    }
    response = requests.request("GET", url, headers=headers, data=payload)

    # json.loads deserialises a JSON object to a standard python object (good for parsing)
    # also it turns nulls to a None python object
    standings = json.loads(response.text)

    # team_lvl (team level) grabs the section of the returned data that we need to parse through to collect 
    # info on the team
    team_lvl = standings['response'][0]
       
    # Creating a DataFrame to store the team API ids
    team_position_df = pd.DataFrame(columns = ['Team','Team ID', 'Conference', 'Conference Position'])
     
    # Creating variables that hold the requests daily limit remaining (100 per day) 
    # and the per minute requests remaining (10 per minute)
    daily_calls_remaining = response.headers['x-ratelimit-requests-remaining']
    perMinute_calls_remaining = response.headers['X-RateLimit-Remaining']
    
    print(f'Daily calls remaining = {daily_calls_remaining}')
    print(f'Calls left per minute = {perMinute_calls_remaining}\n')

    df_row = 0
    
    for team in team_lvl:
        if team['group']['name'] == "Western Conference" or team['group']['name'] == "Eastern Conference":
            t_id = int(team['team']['id'])
            t_name = team['team']['name']
            t_conference = team['group']['name']
            t_position = str(team['position'])

            team_position_df.loc[df_row] = [t_name, t_id, t_conference, t_position]

            df_row += 1
        else:
            break

    #team_position_df.set_index('Team', inplace = True)

    return team_position_df


def team_stats_df_all_season(team_id_list):

    games_played_df = pd.DataFrame(columns = ['Team', 'Home games', 'Away games', 'Total games', 'Team ID'])
    games_won_df = pd.DataFrame(columns = ['Team', 'Home wins', 'Away wins', 'Total wins', 'Team ID'])
    games_lost_df = pd.DataFrame(columns = ['Team', 'Home loses', 'Away loses', 'Total loses', 'Team ID'])
    points_for_df = pd.DataFrame(columns = ['Team', 'Home points for', 'Away points for', 'Total points for', 'Team ID'])
    points_against_df = pd.DataFrame(columns = ['Team', 'Home points against', 'Away points against', 'Total points against', 'Team ID'])
    
    df_row = 0    
    
    for id in team_id_list:
        t_id = str(id)
        # API URL for NBA team data via api-basketball.com
        url = "https://v1.basketball.api-sports.io/statistics?season=2022-2023&team="+t_id+"&league=12"

        payload = {}
        headers = {
            'x-rapidapi-key': api_key['x-rapidapi-key'],
            'x-rapidapi-host': 'v1.basketball.api-sports.io'
        }

        daily_calls_remaining = 1
        perMinute_calls_remaining = 1

        # if statement to make sure I don't go over the API's free version daily and per minunte limit
        if perMinute_calls_remaining > 0 and daily_calls_remaining > 0: 
            response = requests.request("GET", url, headers=headers, data=payload)
            # request.request sends a request of a specified method to the url (in this case the method is GET)

            # Parses the JSON
            team_statistics = json.loads(response.text)
            # Use the print statment below to check the layout of the data pulled with the GET request
            # print(json.dumps(team_statistics, indent=4))

            daily_calls_remaining = int(response.headers['x-ratelimit-requests-remaining'])
            perMinute_calls_remaining = int(response.headers['X-RateLimit-Remaining'])

            # extracts team name from data
            team_name = team_statistics["response"]["team"]["name"]
            
            # extracts data for GAMES PLAYED DF
            games_home = team_statistics["response"]["games"]["played"]["home"]
            games_away = team_statistics["response"]["games"]["played"]["away"]
            games_all = team_statistics["response"]["games"]["played"]["all"]

            # extracts data for GAMES WON DF
            won_home = team_statistics["response"]["games"]["wins"]["home"]["total"]
            won_away = team_statistics["response"]["games"]["wins"]["away"]["total"]
            total_won = team_statistics["response"]["games"]["wins"]["all"]["total"]

            # extracts data for GAMES LOST DF
            lost_home = team_statistics["response"]["games"]["loses"]["home"]["total"]
            lost_away = team_statistics["response"]["games"]["loses"]["away"]["total"]
            total_lost = team_statistics["response"]["games"]["loses"]["all"]["total"]

            # extracts data for POINTS FOR DF
            points_for_home = team_statistics["response"]["points"]["for"]["total"]["home"]
            points_for_away = team_statistics["response"]["points"]["for"]["total"]["away"]
            points_for_total = team_statistics["response"]["points"]["for"]["total"]["all"]

            # extracts data for POINTS AGAINST DF
            points_against_home = team_statistics["response"]["points"]["against"]["total"]["home"]
            points_against_away = team_statistics["response"]["points"]["against"]["total"]["away"]
            points_against_total = team_statistics["response"]["points"]["against"]["total"]["all"]

            games_played_df.loc[df_row] = [team_name, games_home, games_away, games_all, id]
            games_won_df.loc[df_row] = [team_name, won_home, won_away, total_won, id]
            games_lost_df.loc[df_row] = [team_name, lost_home, lost_away, total_lost, id]
            points_for_df.loc[df_row] = [team_name, points_for_home, points_for_away, points_for_total, id]
            points_against_df.loc[df_row] = [team_name, points_against_home, points_against_away, points_against_total, id]
              
            df_row += 1

            print(games_played_df)
            print(games_won_df)
            print(games_lost_df)
            print(points_for_df)
            print(points_against_df)
            
            print('=====================================================================================')
            
            print('Daily calls remaining = ' + str(daily_calls_remaining))
            print('Calls left per minute = ' + str(perMinute_calls_remaining), '\n')

            time.sleep(0.25)
            # Sleep for 0.25 seconds to avoid having more than 300 calls per minute as I am using the PRO API plan  
        else:
            break

    return games_played_df, games_won_df, games_lost_df, points_for_df, points_against_df



In [4]:
team_id_list = team_id_list()
team_position_df = team_position_df()
display(team_position_df)

Daily calls remaining = 7499
Calls left per minute = 299

Daily calls remaining = 7498
Calls left per minute = 298



Unnamed: 0,Team,Team ID,Conference,Conference Position
0,Denver Nuggets,139,Western Conference,1
1,Memphis Grizzlies,146,Western Conference,2
2,Sacramento Kings,157,Western Conference,3
3,Phoenix Suns,155,Western Conference,4
4,Golden State Warriors,141,Western Conference,5
5,Los Angeles Clippers,144,Western Conference,6
6,New Orleans Pelicans,150,Western Conference,7
7,Los Angeles Lakers,145,Western Conference,8
8,Minnesota Timberwolves,149,Western Conference,9
9,Oklahoma City Thunder,152,Western Conference,10


In [None]:
games_played_df, games_won_df, games_lost_df, points_for_df, points_against_df = team_stats_df_all_season(team_id_list)

# Function below rectifies games_played_df to the regular season

In [6]:
def games_played_rectifier(games_played_df):
    
    games_played_df_reg =  games_played_df.copy()
    display(games_played_df_reg)
# can make following...
    for index, row in games_played_df.iterrows():  
        if int(row['Total games']) > 82:     
            t_id = str(row['Team ID'])
            # API URL for GAMES PLAYED data via api-basketball.com
            url = "https://v1.basketball.api-sports.io/games?league=12&season=2022-2023&team="+t_id+"&timezone=America/New_York"
    
            payload = {}
            headers = {
                'x-rapidapi-key': api_key['x-rapidapi-key'],
                'x-rapidapi-host': 'v1.basketball.api-sports.io'
            }
        
            daily_calls_remaining = 1
            perMinute_calls_remaining = 1
            
            # if statement to make sure I don't go over the API's free version daily and per minute limit
            if perMinute_calls_remaining > 0 and daily_calls_remaining > 0: 
                response = requests.request("GET", url, headers=headers, data=payload)
                # request.request sends a request of a specified method to the url (in this case the method is GET)
        
                # Parses the JSON
                games = json.loads(response.text)
                game_lvl = games['response']
                
        
                daily_calls_remaining = int(response.headers['x-ratelimit-requests-remaining'])
                perMinute_calls_remaining = int(response.headers['X-RateLimit-Remaining'])

                time.sleep(0.25)
            # Sleep for 0.25 seconds to avoid having more than 300 calls per minute as I am using the PRO API plan  
            else:
                break
#...into a function (Make a function for each of the API URLs )

            
            for game in game_lvl:
                if dt.datetime.fromisoformat(game['date']).date() < datetime(2022, 10, 18).date() or dt.datetime.fromisoformat(game['date']).date() > datetime(2023, 4, 9).date(): 
                    team_name = row['Team']
                    home_team = game["teams"]["home"]['name']
                    away_team = game["teams"]["away"]['name']
                    if team_name in home_team:
                        games_played_df_reg.loc[index, "Home games"] -= 1
                        games_played_df_reg.loc[index, "Total games"] -= 1
        
                        display(games_played_df_reg[games_played_df_reg['Team ID'] == int(t_id)])
                    elif team_name in away_team:
                        games_played_df_reg.loc[index,"Away games"] -= 1
                        games_played_df_reg.loc[index,"Total games"] -= 1
                        
                        display(games_played_df_reg[games_played_df_reg['Team ID'] == int(t_id)])
                else:
                    continue
    
                print('=====================================================================================')
                
                print('Daily calls remaining = ' + str(daily_calls_remaining))
                print('Calls left per minute = ' + str(perMinute_calls_remaining), '\n')
        else:
            continue
        display(games_played_df_reg)
    return games_played_df_reg


In [None]:
games_played_df_reg = games_played_rectifier(games_played_df)

# Function below does the same as the function 2 cells above but (The API call has been created in its own function which should be useful as I can call it within the other rectifying functions) 

## Note to self: *I should call this same API function in the functions in cell 3 but (to avoid distraction) create the other DF rectifiers first*

In [28]:
def api_call(t_id, url):

    payload = {}
    headers = {
        'x-rapidapi-key': api_key['x-rapidapi-key'],
        'x-rapidapi-host': 'v1.basketball.api-sports.io'
    }
    
    try:    
        response = requests.request("GET", url, headers=headers, data=payload)
        # request.request sends a request of a specified method to the url (in this case the method is GET)

        daily_calls_remaining = int(response.headers['x-ratelimit-requests-remaining'])
        perMinute_calls_remaining = int(response.headers['X-RateLimit-Remaining'])

        print('Daily calls remaining = ' + str(daily_calls_remaining))
        print('Calls left per minute = ' + str(perMinute_calls_remaining), '\n')

        time.sleep(0.25)
    # Sleep for 0.25 seconds to avoid using more than 300 calls per minute as I am using the PRO API plan  
    except Exception as e:
        print(e)
        
    return response
        

def games_played_rectifier(games_played_df):
    
    games_played_df_reg =  games_played_df.copy()
    display(games_played_df_reg)
# can make following...
    for index, row in games_played_df.iterrows():  
        if int(row['Total games']) > 82:
            t_id = str(row['Team ID'])
            response = api_call(t_id, "https://v1.basketball.api-sports.io/games?league=12&season=2022-2023&team="+t_id+"&timezone=America/New_York")
            games = json.loads(response.text)
            game_lvl = games['response']
#...into a function (Make a function for each of the API URLs )
            for game in game_lvl:
                game_date = dt.datetime.fromisoformat(game['date']).date()
                if game_date < datetime(2022, 10, 18).date() or game_date > datetime(2023, 4, 9).date(): 
                    team_name = row['Team']
                    home_team = game["teams"]["home"]['name']
                    away_team = game["teams"]["away"]['name']
                    if team_name in home_team:
                        games_played_df_reg.loc[index, "Home games"] -= 1
                        games_played_df_reg.loc[index, "Total games"] -= 1
        
                        display(games_played_df_reg[games_played_df_reg['Team ID'] == int(t_id)])
                    elif team_name in away_team:
                        games_played_df_reg.loc[index,"Away games"] -= 1
                        games_played_df_reg.loc[index,"Total games"] -= 1
                        
                        display(games_played_df_reg[games_played_df_reg['Team ID'] == int(t_id)])
                else:
                    continue
    
                print('=====================================================================================')
                
                # print('Daily calls remaining = ' + str(daily_calls_remaining))
                # print('Calls left per minute = ' + str(perMinute_calls_remaining), '\n')
        else:
            continue
        display(games_played_df_reg)
    return games_played_df_reg


In [None]:
games_played_df_reg = games_played_rectifier(games_played_df)

In [None]:
def games_won_rectifier(games_won_df):
    
    games_won_df_reg =  games_won_df.copy()
    display(games_won_df)
# can make following...
    for index, row in games_won_df.iterrows():  
        t_id = str(row['Team ID'])
        response = api_call(t_id, "https://v1.basketball.api-sports.io/games?league=12&season=2022-2023&team="+t_id+"&timezone=America/New_York")
        games = json.loads(response.text)
        game_lvl = games['response']
#...into a function (Make a function for each of the API URLs )
'''        for game in game_lvl:
            game_date = dt.datetime.fromisoformat(game['date']).date()
            if game_date < datetime(2022, 10, 18).date() or game_date > datetime(2023, 4, 9).date(): 
                team_name = row['Team']
                home_team = game["teams"]["home"]['name']
                away_team = game["teams"]["away"]['name']
                if team_name in home_team:
                    games_played_df_reg.loc[index, "Home games"] -= 1
                    games_played_df_reg.loc[index, "Total games"] -= 1
    
                    display(games_played_df_reg[games_played_df_reg['Team ID'] == int(t_id)])
                elif team_name in away_team:
                    games_played_df_reg.loc[index,"Away games"] -= 1
                    games_played_df_reg.loc[index,"Total games"] -= 1
                    
                    display(games_played_df_reg[games_played_df_reg['Team ID'] == int(t_id)])
            else:
                continue

            print('=====================================================================================')
            
            # print('Daily calls remaining = ' + str(daily_calls_remaining))
            # print('Calls left per minute = ' + str(perMinute_calls_remaining), '\n')
    else:
        continue
    display(games_played_df_reg)
    return games_played_df_reg '''

In [None]:
print(team_id_list)
print(team_position_df)