# Imports 📦

In [151]:
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from timeit import default_timer as timer
from http import HTTPStatus
from datetime import datetime
import os
# Sub your own keys
import keys as k

In [21]:
# predefined urls for different access points
urls = {
    'seasons': 'https://api-nba-v1.p.rapidapi.com/seasons/',
    'games': 'https://api-nba-v1.p.rapidapi.com/games/',
    'teams': 'https://api-nba-v1.p.rapidapi.com/teams/',
    'players': 'https://api-nba-v1.p.rapidapi.com/players/'
}
season = '2023'

In [3]:
# key headers
headers = k.headers

In [50]:
# function to generate api response
def get_response(url, headers=None):
    '''
    Retrieving API response as well as checking for error codes, if any.
    '''
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raises a HTTPError if the status is 4xx, 5xx
    except requests.exceptions.HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
    except requests.exceptions.RequestException as err:
        print(f'Other error occurred: {err}')
    else:
        print('Success!')
        return response.json()

    return None

# Retrieving Teams 🏁

In [185]:
if not os.path.exists('nbaTeams.csv'):
    teams = get_response(url = urls['teams'])
else:
    print('Retrieve teams from existing file.)')

## Clean

In [186]:
def clean_teams(df):
    '''
    Clean the teams data
    '''
    df = pd.DataFrame(data = teams['response'])
    nbaTeams = df[df.nbaFranchise == True]
    nbaTeams = nbaTeams[nbaTeams.name != 'Home Team Stephen A']
    nbaTeams.reset_index(drop=True, inplace=True)

    nbaTeams['standard'] = nbaTeams['leagues'].apply(lambda x: x.get('standard'))
    nbaTeams['conference'] = nbaTeams['standard'].apply(lambda x: x.get('conference'))
    nbaTeams['division'] = nbaTeams['standard'].apply(lambda x: x.get('division'))

    nbaTeams = nbaTeams.drop(columns=['nbaFranchise', 'allStar','logo','standard','leagues'])


    return nbaTeams

In [None]:
nbaTeams = clean_teams(teams)

In [161]:
nbaTeams.head(2)

Unnamed: 0,id,name,nickname,code,city,conference,division
0,1,Atlanta Hawks,Hawks,ATL,Atlanta,East,Southeast
1,2,Boston Celtics,Celtics,BOS,Boston,East,Atlantic


# Retrieving Team Stats 📊

In [174]:
# lets get all the teams we will be using's id
teamIDs = []
for i in nbaTeams['id']:
    teamIDs.append(i)
    print(f"{i} -- {nbaTeams[nbaTeams['id'] == i]['name'].values[0]}")

1 -- Atlanta Hawks
2 -- Boston Celtics
4 -- Brooklyn Nets
5 -- Charlotte Hornets
6 -- Chicago Bulls
7 -- Cleveland Cavaliers
8 -- Dallas Mavericks
9 -- Denver Nuggets
10 -- Detroit Pistons
11 -- Golden State Warriors
14 -- Houston Rockets
15 -- Indiana Pacers
16 -- LA Clippers
17 -- Los Angeles Lakers
19 -- Memphis Grizzlies
20 -- Miami Heat
21 -- Milwaukee Bucks
22 -- Minnesota Timberwolves
23 -- New Orleans Pelicans
24 -- New York Knicks
25 -- Oklahoma City Thunder
26 -- Orlando Magic
27 -- Philadelphia 76ers
28 -- Phoenix Suns
29 -- Portland Trail Blazers
30 -- Sacramento Kings
31 -- San Antonio Spurs
38 -- Toronto Raptors
40 -- Utah Jazz
41 -- Washington Wizards


In [24]:
teamStats = get_response(url = urls['teams'] + "statistics?season=" + season + "&id=")

Success!


In [175]:
def retrieve_teamStats(teamIDs, season = '2023', rateLimit = 10):
    '''
    Retrieve team statistics for the given season, for all teams with listed IDs.

    The NBA actually has changed team names, API may have differently associated IDs for those names. (has not been confirmed)
    '''
    responses = []
    counter = 0
    totCounter = 0

    for i in teamIDs:
        # init timer
        start = timer()

        # counters
        counter += 1
        totCounter += 1

        if counter > rateLimit:
            time.sleep(70) # freemium API, 10 calls per minute
            counter = 1
        print(f"Making request {totCounter} at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        response = get_response(url = urls['teams'] + "statistics?season=" + season + "&id=" + str(i))
        responses.append(response)
        end = timer()
        timeCount = round(end - start, 2)
        print(f"Request {counter}/10 took {timeCount} seconds.\n|-----------------|")
    return responses

In [183]:
if not os.path.exists('nbaTeams.csv'):
    responses = retrieve_teamStats(teamIDs = teamIDs)
else:
    print("Data already exists, skipping retrieval.")

Data already exists, skipping retrieval.


## Clean

In [176]:
def clean_team_stats(responses):
    '''
    Remove some additional columns that the 2023 season may not be complete for.
    Columns were entirely 0s.
    '''
    df = pd.concat([pd.DataFrame(r['response']) for r in responses], ignore_index=True)
    df.drop(columns=['fastBreakPoints','pointsInPaint','biggestLead',
                 'secondChancePoints','pointsOffTurnovers','longestRun'], inplace=True)
    return df

In [184]:
df = clean_team_stats(responses = responses)
df.head(2)


Unnamed: 0,games,points,fgm,fga,fgp,ftm,fta,ftp,tpm,tpa,tpp,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus
0,88,10358,3766,8105,64.9,1628,2046,79.2,1198,3317,36.2,1083,2839,3922,2339,1631,676,1226,387,-205
1,97,11551,4201,8668,62.7,1574,1958,79.6,1575,4094,38.4,1036,3459,4495,2556,1572,658,1173,611,1097


In [181]:
nbaTeams = pd.concat([nbaTeams, df], axis=1)
nbaTeams.head(2)

Unnamed: 0,id,name,nickname,code,city,conference,division,games,points,fgm,fga,fgp,ftm,fta,ftp,tpm,tpa,tpp,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus,games.1,points.1,fgm.1,fga.1,fgp.1,ftm.1,fta.1,ftp.1,tpm.1,tpa.1,tpp.1,offReb.1,defReb.1,totReb.1,assists.1,pFouls.1,steals.1,turnovers.1,blocks.1,plusMinus.1
0,1,Atlanta Hawks,Hawks,ATL,Atlanta,East,Southeast,88,10358,3766,8105,64.9,1628,2046,79.2,1198,3317,36.2,1083,2839,3922,2339,1631,676,1226,387,-205,88,10358,3766,8105,64.9,1628,2046,79.2,1198,3317,36.2,1083,2839,3922,2339,1631,676,1226,387,-205
1,2,Boston Celtics,Celtics,BOS,Boston,East,Atlantic,97,11551,4201,8668,62.7,1574,1958,79.6,1575,4094,38.4,1036,3459,4495,2556,1572,658,1173,611,1097,97,11551,4201,8668,62.7,1574,1958,79.6,1575,4094,38.4,1036,3459,4495,2556,1572,658,1173,611,1097


# Cache 💰

In [None]:
if not os.path.exists('nbaTeams.csv'):
    nbaTeams.to_csv('nbaTeams.csv', index=False)
else:
    print("Data already exists, skipping save.")

# Getting Player Stats