In [3]:
import pandas as pd
import numpy as np
import json
import requests
import statsmodels.api as sm
import pygwalker as pyg
from pygwalker.api.pygwalker import PygWalker
from IPython.display import display
headers = {
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.9',
    'Connection': 'keep-alive',
    'Origin': 'https://www.nba.com',
    'Referer': 'https://www.nba.com/',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'Host': 'stats.nba.com',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true'
}

url = 'https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=10&LeagueID=00&Location=&MeasureType=Four%20Factors&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season=2024-25&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision='

try:
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        print("Successfully got data!")
        print(json.dumps(data, indent=2)[:500])
    else:
        print(f"Failed to get data: Status code {response.status_code}")
        print("Response:", response.text)
except Exception as e:
    print(f"Error occurred: {e}")

Successfully got data!
{
  "resource": "leaguedashteamstats",
  "parameters": {
    "MeasureType": "Four Factors",
    "PerMode": "PerGame",
    "PlusMinus": "N",
    "PaceAdjust": "N",
    "Rank": "N",
    "LeagueID": "00",
    "Season": "2024-25",
    "SeasonType": "Regular Season",
    "PORound": 0,
    "Outcome": null,
    "Location": null,
    "Month": 0,
    "SeasonSegment": null,
    "DateFrom": null,
    "DateTo": null,
    "OpponentTeamID": 0,
    "VsConference": null,
    "VsDivision": null,
    "TeamID": 0,


In [4]:
# Let's see what's in our data
print("Keys in the response:")
print(data.keys())

# The actual stats are usually in a 'resultSets' key
# Let's check the structure of that
if 'resultSets' in data:
    print("\nStructure of results:")
    print(data['resultSets'][0].keys())  # Usually contains 'headers' and 'rowSet'


Keys in the response:
dict_keys(['resource', 'parameters', 'resultSets'])

Structure of results:
dict_keys(['name', 'headers', 'rowSet'])


In [5]:
# Get the headers and rows from the first result set
headers = data['resultSets'][0]['headers']
rows = data['resultSets'][0]['rowSet']

# Create DataFrame
df = pd.DataFrame(rows, columns=headers)

# Let's look at our data
print("Shape of DataFrame:", df.shape)
print("\nColumns:")
print(df.columns.tolist())
print("\nFirst few rows:")
print(df.head())

Shape of DataFrame: (30, 28)

Columns:
['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'EFG_PCT', 'FTA_RATE', 'TM_TOV_PCT', 'OREB_PCT', 'OPP_EFG_PCT', 'OPP_FTA_RATE', 'OPP_TOV_PCT', 'OPP_OREB_PCT', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'EFG_PCT_RANK', 'FTA_RATE_RANK', 'TM_TOV_PCT_RANK', 'OREB_PCT_RANK', 'OPP_EFG_PCT_RANK', 'OPP_FTA_RATE_RANK', 'OPP_TOV_PCT_RANK', 'OPP_OREB_PCT_RANK']

First few rows:
      TEAM_ID          TEAM_NAME  GP  W  L  W_PCT    MIN  EFG_PCT  FTA_RATE  \
0  1610612737      Atlanta Hawks  10  5  5    0.5  485.0    0.563     0.274   
1  1610612738     Boston Celtics  10  8  2    0.8  480.0    0.579     0.171   
2  1610612751      Brooklyn Nets  10  6  4    0.6  480.0    0.503     0.223   
3  1610612766  Charlotte Hornets  10  2  8    0.2  480.0    0.466     0.213   
4  1610612741      Chicago Bulls  10  2  8    0.2  485.0    0.536     0.241   

   TM_TOV_PCT  ...  W_PCT_RANK  MIN_RANK  EFG_PCT_RANK  FTA_RATE_RANK  \
0       0.139  ...  

In [6]:
df.head(10)

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,...,W_PCT_RANK,MIN_RANK,EFG_PCT_RANK,FTA_RATE_RANK,TM_TOV_PCT_RANK,OREB_PCT_RANK,OPP_EFG_PCT_RANK,OPP_FTA_RATE_RANK,OPP_TOV_PCT_RANK,OPP_OREB_PCT_RANK
0,1610612737,Atlanta Hawks,10,5,5,0.5,485.0,0.563,0.274,0.139,...,15,5,7,3,15,18,28,29,13,4
1,1610612738,Boston Celtics,10,8,2,0.8,480.0,0.579,0.171,0.133,...,3,18,4,30,8,23,5,4,29,2
2,1610612751,Brooklyn Nets,10,6,4,0.6,480.0,0.503,0.223,0.157,...,8,18,27,27,28,1,4,22,4,29
3,1610612766,Charlotte Hornets,10,2,8,0.2,480.0,0.466,0.213,0.157,...,26,18,30,28,27,15,27,1,14,25
4,1610612741,Chicago Bulls,10,2,8,0.2,485.0,0.536,0.241,0.143,...,26,5,17,23,17,24,24,12,17,22
5,1610612739,Cleveland Cavaliers,10,9,1,0.9,480.0,0.591,0.242,0.147,...,1,18,1,21,20,6,1,10,15,21
6,1610612742,Dallas Mavericks,10,5,5,0.5,485.0,0.538,0.236,0.124,...,15,5,15,24,5,26,19,6,20,27
7,1610612743,Denver Nuggets,10,9,1,0.9,480.0,0.59,0.211,0.137,...,1,18,2,29,11,5,6,19,26,12
8,1610612765,Detroit Pistons,10,8,2,0.8,480.0,0.569,0.282,0.145,...,3,18,5,2,19,4,12,27,5,3
9,1610612744,Golden State Warriors,10,7,3,0.7,480.0,0.535,0.26,0.118,...,7,18,19,9,3,7,11,23,1,15


In [7]:
#Let's run our data and get a statistical summary. First we have to create a new column and do some math into the new column
def calc_dev():
    df['Shooting Dev'] = df['EFG_PCT']-df['OPP_EFG_PCT']
    df['Rebounding Dev'] = df['OREB_PCT']-df['OPP_OREB_PCT']
    df['Turnover Dev'] = df['TM_TOV_PCT']-df['OPP_TOV_PCT']
    df['Free Throw Dev'] = df['FTA_RATE']-df['OPP_FTA_RATE']
    return df
calc_dev()
df.head(5)

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,...,TM_TOV_PCT_RANK,OREB_PCT_RANK,OPP_EFG_PCT_RANK,OPP_FTA_RATE_RANK,OPP_TOV_PCT_RANK,OPP_OREB_PCT_RANK,Shooting Dev,Rebounding Dev,Turnover Dev,Free Throw Dev
0,1610612737,Atlanta Hawks,10,5,5,0.5,485.0,0.563,0.274,0.139,...,15,18,28,29,13,4,-0.002,0.019,-0.005,-0.018
1,1610612738,Boston Celtics,10,8,2,0.8,480.0,0.579,0.171,0.133,...,8,23,5,4,29,2,0.068,0.022,0.022,-0.039
2,1610612751,Brooklyn Nets,10,6,4,0.6,480.0,0.503,0.223,0.157,...,28,1,4,22,4,29,-0.005,0.026,-0.009,-0.04
3,1610612766,Charlotte Hornets,10,2,8,0.2,480.0,0.466,0.213,0.157,...,27,15,27,1,14,25,-0.097,-0.028,0.015,0.018
4,1610612741,Chicago Bulls,10,2,8,0.2,485.0,0.536,0.241,0.143,...,17,24,24,12,17,22,-0.023,-0.045,0.008,0.0


In [16]:
#Let's create a function that will allow us to easily extract shooting dev, rebounding dev, turnover dev, and free throw dev for a team and year
def get_team_stats(team):
    team_data = df[df['TEAM_NAME'] == team]
    shooting_dev = team_data['Shooting Dev'].values[0]
    rebounding_dev = team_data['Rebounding Dev'].values[0]
    turnover_dev = team_data['Turnover Dev'].values[0]
    free_throw_dev = team_data['Free Throw Dev'].values[0]
    wins = int(team_data['W'].values[0])
    gp = int(team_data['GP'].values[0])

# hard coded numbers derived from regression analysis
    predicted_win_pct = ( 
        0.5006 + 
        (shooting_dev * 4.6282) +
        (rebounding_dev * 1.5397) +
        (turnover_dev * -3.7446) +
        (free_throw_dev * 0.6154))
    
    predicted_wins = round(predicted_win_pct * 82)

    return {
        'Predicted Wins': predicted_wins,
        'Actual Wins': wins
        }
#Let's test our function
get_team_stats('Denver Nuggets')

{'Predicted Wins': 69, 'Actual Wins': 9}