In [13]:
import pandas as pd
from nba_api.stats.endpoints import *
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.static import teams
import json
import time

In [None]:
# Define the playtypes and seasons
playtypes = ['Isolation', 'Transition', 'PRBallHandler', 'PRRollman', 'Postup', 'Spotup', 'Handoff', 'Cut', 'OffScreen', 'OffRebound', 'Misc']
seasons = ['2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24', '2024-25']

# Columns that shouldn't be renamed with playtype suffix
id_columns = ['SEASON_ID', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'TYPE_GROUPING']

# Initialize an empty list to store all dataframes
all_dfs = []

# Loop through each season and playtype
for i, season in enumerate(seasons):
    print(f"Processing season: {season}")
    
    season_dfs = []
    
    for playtype in playtypes:
        print(f"  - Getting data for playtype: {playtype}")
        
        try:
            time.sleep(1)
            # API call
            synergy_data = synergyplaytypes.SynergyPlayTypes(
                league_id="00",
                per_mode_simple="PerGame",
                play_type_nullable=playtype,
                player_or_team_abbreviation="P",
                season=season,
                season_type_all_star="Regular Season",
                type_grouping_nullable="Offensive", timeout=2
            )
            
            # Convert to dataframe
            data = json.loads(synergy_data.get_json())
            df = pd.DataFrame(data['resultSets'][0]['rowSet'], columns=data['resultSets'][0]['headers'])
            
            # Remove 'PLAY_TYPE' from the input data since we'll create our own playtype-specific columns
            if 'PLAY_TYPE' in df.columns:
                df = df.drop(columns=['PLAY_TYPE'])
            
            # Rename stat columns to include playtype
            rename_dict = {}
            for col in df.columns:
                if col not in id_columns:
                    rename_dict[col] = f"{col}_{playtype}"
            
            df = df.rename(columns=rename_dict)
            df['SEASON'] = season
            
            # Add to list of dataframes for this season
            season_dfs.append(df)
            
            print(f"    Successfully retrieved {len(df)} player records")
            
        except Exception as e:
            print(f"    Error retrieving {playtype} data for {season}: {str(e)}")
    
    # If we have dataframes for this season, merge them
    if season_dfs:
        # Start with the first dataframe
        season_df = season_dfs[0]
        
        # Merge with the rest
        for df in season_dfs[1:]:
            season_df = pd.merge(
                season_df,
                df,
                on=['SEASON_ID', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'TYPE_GROUPING', 'SEASON'],
                how='outer'
            )
        
        # Add to list of all seasons
        all_dfs.append(season_df)
        print(f"  Completed merging {len(season_dfs)} playtypes for season {season}")


    # Create an empty dictionary to store player_id: position mappings
    position_dict = {}

    # Get all team IDs
    all_teams = teams.get_teams()
    team_ids = [team['id'] for team in all_teams]

    # Loop through teams to get roster data with positions
    for team_id in team_ids:
        try:
            time.sleep(1)
            roster = commonteamroster.CommonTeamRoster(team_id=team_id, season=season).get_data_frames()[0]
            # Add each player's position to our dictionary
            for _, player in roster.iterrows():
                position_dict[player['PLAYER_ID']] = player['POSITION']
        except:
            continue

    all_dfs[i]['position'] = ''
    all_dfs[i]['position'] = all_dfs[i]['PLAYER_ID'].map(position_dict)
    
# Combine all seasons
if all_dfs:
    final_df = pd.concat(all_dfs, ignore_index=True)
    print(f"Final dataframe shape: {final_df.shape}")
    
    # Fill NaN values for missing data
    print("Filling NaN values with appropriate defaults...")
    
    # Save to CSV
    final_df.to_csv('synergy_all_playtypes_2015_to_2025.csv', index=False)
    print("Data saved to 'synergy_all_playtypes_2015_to_2025.csv'")
else:
    print("No data was retrieved.")

# Display the first few rows
final_df.head()

Processing season: 2015-16
  - Getting data for playtype: Isolation
    Successfully retrieved 287 player records
  - Getting data for playtype: Transition
    Successfully retrieved 396 player records
  - Getting data for playtype: PRBallHandler
    Successfully retrieved 272 player records
  - Getting data for playtype: PRRollman
    Successfully retrieved 202 player records
  - Getting data for playtype: Postup
    Successfully retrieved 216 player records
  - Getting data for playtype: Spotup
    Successfully retrieved 392 player records
  - Getting data for playtype: Handoff
    Successfully retrieved 233 player records
  - Getting data for playtype: Cut
    Successfully retrieved 334 player records
  - Getting data for playtype: OffScreen
    Successfully retrieved 238 player records
  - Getting data for playtype: OffRebound
    Successfully retrieved 287 player records
  - Getting data for playtype: Misc
    Successfully retrieved 366 player records
  Completed merging 11 playty

In [16]:
final_df['position'].value_counts()

position
G      166
F      113
C       38
F-C     29
G-F     29
C-F     16
F-G     12
Name: count, dtype: int64

In [17]:
final_df.isna().sum()

SEASON_ID               0
PLAYER_ID               0
PLAYER_NAME             0
TEAM_ID                 0
TEAM_ABBREVIATION       0
                     ... 
PTS_Misc             1077
FGM_Misc             1077
FGA_Misc             1077
FGMX_Misc            1077
position             4310
Length: 185, dtype: int64