In [1]:
import pandas as pd
from nba_api.stats.endpoints import *
import json
from tqdm import tqdm
import os

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Get PPP and total possessions for all teams

In [3]:
def get_df_from_json(team_stats_json):
    """
    Get DataFrame format of json data
    
    Parameters:
    - team_stats_json: json of data
    
    Returns:
    - team_stats_df: json data as a Pandas DataFrame
    """
    # Load the JSON data into a DataFrame
    team_stats_df = pd.DataFrame(
        json.loads(team_stats_json)['resultSets'][0]['rowSet'], 
        columns = json.loads(team_stats_json)['resultSets'][0]['headers']
    )
    
    return team_stats_df


def get_shot_clock_splits(shot_clock_lst, season = '2024-25', **kwargs):
    """
    Get shot clock split data with flexible API parameters
    
    Parameters:
    - shot_clock_lst: List of shot clock ranges
    - season: NBA season (default '2024-25')
    - **kwargs: Additional parameters to pass into LeagueDashTeamStats
    
    Returns:
    - shot_clock_dfs: Dictionary of DataFrames for each shot clock range with the range as the key
    """
    shot_clock_dfs = {}
    
    # Loop through the different shot clock timeframes
    for timeframe in shot_clock_lst:
        
        # Pull the JSON per 100 possessions data for the year and shot clock timeframe
        per_poss_stats = leaguedashteamstats.LeagueDashTeamStats(
            season = season, 
            shot_clock_range_nullable = timeframe
        ).get_json()
        per_poss_df = get_df_from_json(per_poss_stats)
        
        # Pull the JSON Advanced data for the year and shot clock timeframe
        adv_stats = leaguedashteamstats.LeagueDashTeamStats(
            season = season, 
            shot_clock_range_nullable = timeframe, 
            measure_type_detailed_defense = 'Advanced'
        ).get_json()
        adv_df = get_df_from_json(adv_stats)
        
        # Merge the two DataFrames together on 'TEAM_ID' and filter to the columns we care about
        team_stats_df = per_poss_df.merge(adv_df, on=['TEAM_ID', 'TEAM_NAME'], how='inner')
        team_stats_df = team_stats_df[['TEAM_ID', 'TEAM_NAME', 'PTS', 'POSS']]
        
        shot_clock_dfs[timeframe] = team_stats_df
        
    return shot_clock_dfs

In [4]:
# Load the shot clock data at a per 100 possesssions split into the 
shot_clock_lst = ['24-22', '22-18 Very Early', '18-15 Early', '15-7 Average', '7-4 Late', '4-0 Very Late']
shot_clock_dfs = get_shot_clock_splits(shot_clock_lst)

In [5]:
shot_clock_dfs['22-18 Very Early']

Unnamed: 0,TEAM_ID,TEAM_NAME,PTS,POSS
0,1610612737,Atlanta Hawks,1066,1300
1,1610612738,Boston Celtics,732,861
2,1610612751,Brooklyn Nets,753,968
3,1610612766,Charlotte Hornets,708,876
4,1610612741,Chicago Bulls,1121,1399
5,1610612739,Cleveland Cavaliers,904,1005
6,1610612742,Dallas Mavericks,905,1035
7,1610612743,Denver Nuggets,1122,1290
8,1610612765,Detroit Pistons,1007,1184
9,1610612744,Golden State Warriors,858,1080


In [6]:
shot_clock_dfs['22-18 Very Early'].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 30 entries, 0 to 29
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   TEAM_ID    30 non-null     int64 
 1   TEAM_NAME  30 non-null     object
 2   PTS        30 non-null     int64 
 3   POSS       30 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 1.2+ KB


### Get PPP as a weighted average for the whole remaining shot clock

In [7]:
def aggregate_shot_clock_data(shot_clock_dfs, shot_clock_lst):
    """
    Aggregates shot clock data across a selcted list of timeframes to calculate a weighted average of PPP per team

    Parameters:
    - shot_clock_dfs: Dictionary of DataFrames, where keys represent shot clock ranges
    - shot_clock_lst: List of shot clock ranges to aggregate

    Returns:
    - final_df: A DataFrame with TEAM_ID, TEAM_NAME, weighted PTS per possession, and total POSS.
    """
    # Initialize variables
    aggregated_data = []
    shot_clock_remaining = shot_clock_lst[0].split(' ')[0]
    
    # Loop through each shot clock range in the list
    for timeframe in shot_clock_lst:
        aggregated_data.append(shot_clock_dfs[timeframe])

    # Combine all dataframes
    merged_df = pd.concat(aggregated_data)

    # Aggregate by TEAM_ID and TEAM_NAME
    final_df = merged_df.groupby(['TEAM_ID', 'TEAM_NAME'], as_index=False).agg(
        Total_POSS=('POSS', 'sum'),
        Total_PTS=('PTS', 'sum')
    )

    # Calculate final weighted PTS per possession
    final_df['PPP'] = final_df['Total_PTS'] / final_df['Total_POSS']
    final_df['SC_REMAINING'] = shot_clock_remaining

    return final_df

In [8]:
final_shot_clock_dfs = {}
shot_clock_lst = ['24-22', '22-18 Very Early', '18-15 Early', '15-7 Average', '7-4 Late', '4-0 Very Late']

while len(shot_clock_lst) > 0:
    agg_df = aggregate_shot_clock_data(shot_clock_dfs, shot_clock_lst)
    sc = shot_clock_lst.pop(0)
    final_shot_clock_dfs[sc] = agg_df

In [9]:
for timeframe in final_shot_clock_dfs:
    print(final_shot_clock_dfs[timeframe].head())

      TEAM_ID             TEAM_NAME  Total_POSS  Total_PTS       PPP  \
0  1610612737         Atlanta Hawks        9814       6412  0.653352   
1  1610612738        Boston Celtics        9048       6223  0.687776   
2  1610612739   Cleveland Cavaliers        9226       6625  0.718079   
3  1610612740  New Orleans Pelicans        9511       6080  0.639260   
4  1610612741         Chicago Bulls        9580       6369  0.664823   

  SC_REMAINING  
0        24-22  
1        24-22  
2        24-22  
3        24-22  
4        24-22  
      TEAM_ID             TEAM_NAME  Total_POSS  Total_PTS       PPP  \
0  1610612737         Atlanta Hawks        7188       5184  0.721202   
1  1610612738        Boston Celtics        6564       5222  0.795551   
2  1610612739   Cleveland Cavaliers        6830       5568  0.815227   
3  1610612740  New Orleans Pelicans        6915       4913  0.710484   
4  1610612741         Chicago Bulls        7012       5362  0.764689   

  SC_REMAINING  
0        22-18 