<a href="https://colab.research.google.com/github/WillKWL/Project-NBASeason/blob/master/source/1_load_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Does performance in regular season predict playoff ranking? 
- supervised offline multi-class classification task
- performance measure: confusion matrix
- data source: https://www.nba.com/stats/teams/traditional/?sort=W&dir=-1&Season=2021-22&SeasonType=Regular%20Season 

In [None]:
!pip install nba_api

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashteamstats
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
import os

# Gather data with [nba api](https://github.com/swar/nba_api)
- stats adjusted per 100 posessions, instead of per game, to account for the different pace each team plays at
- [glossary](https://www.nba.com/stats/help/glossary/) for column names  
- extra data: [advanced team stats](https://www.nba.com/stats/teams/advanced/?sort=W&dir=-1) 



In [None]:

def season_vs_playoff(start_year, end_year):
  output = None
  season_list = [str(i) + '-' + str(i+1)[2:] for i in range(start_year, end_year)]
  for season in season_list:
    season_df = leaguedashteamstats.LeagueDashTeamStats(
        per_mode_detailed='Per100Possessions', 
        season=season, 
        season_type_all_star='Regular Season').get_data_frames()[0]
    playoff_df = leaguedashteamstats.LeagueDashTeamStats(
        per_mode_detailed='Per100Possessions', 
        season=season, 
        season_type_all_star='Playoffs',
        po_round_nullable=0).get_data_frames()[0]
    # win 4 games = win 1 round
    # ranking = 0 for teams which didn't enter playoffs
    # ranking = 5 for champion which won 4 rounds, 16 games
    if playoff_df['W'].max() == 16:
      # to win a championship, you need to win 4 rounds of best-of-7 (4,4,4,4)
      playoff_df['PLAYOFF_RANKING'] = playoff_df['W'].apply(lambda x: int(x/4) + 1)
    elif playoff_df['W'].max() == 15:
      # 2001-02 season is the last season with best-of-5 in 1st round (3,4,4,4) so lets add 1 back to make ranking easier
      playoff_df['PLAYOFF_RANKING'] = playoff_df['W'].apply(lambda x: int((x+1)/4) + 1)

    playoff_df = playoff_df[['TEAM_ID', 'PLAYOFF_RANKING']] 

    season_vs_playoff = pd.merge(season_df, playoff_df, on='TEAM_ID', how = 'outer')
    season_vs_playoff.fillna(0, inplace=True) # teams which are not in playoff will have 0 as PLAYOFF_RANKING
    season_vs_playoff['PLAYOFF_RANKING'] = season_vs_playoff['PLAYOFF_RANKING'].astype(int)
    season_vs_playoff['SEASON'] = season
    output = pd.concat([output,season_vs_playoff.sort_values(by='PLAYOFF_RANKING', ascending = False).reset_index(drop=True)], axis = 0)
  
  return output

In [None]:
df = season_vs_playoff(start_year = 1996, end_year = 2022)
df

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,CFID,CFPARAMS,PLAYOFF_RANKING,SEASON
0,1610612741,Chicago Bulls,82,69,13,0.841,52.5,43.6,92.0,0.473,...,26,1,2,7,1,1,10,Chicago Bulls,5,1996-97
1,1610612762,Utah Jazz,82,64,18,0.780,52.7,41.6,82.7,0.504,...,12,9,26,6,2,2,10,Utah Jazz,4,1996-97
2,1610612745,Houston Rockets,82,57,25,0.695,51.7,39.6,84.5,0.468,...,24,7,1,13,7,7,10,Houston Rockets,3,1996-97
3,1610612748,Miami Heat,82,61,21,0.744,53.6,38.2,84.3,0.453,...,7,14,25,4,12,5,10,Miami Heat,3,1996-97
4,1610612737,Atlanta Hawks,82,56,26,0.683,54.2,38.5,86.3,0.446,...,9,6,4,27,8,4,10,Atlanta Hawks,2,1996-97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,1610612752,New York Knicks,82,37,45,0.451,49.7,38.8,88.8,0.437,...,10,15,25,3,23,19,10,New York Knicks,0,2021-22
26,1610612765,Detroit Pistons,82,23,59,0.280,48.6,38.4,89.3,0.431,...,14,27,30,14,28,26,10,Detroit Pistons,0,2021-22
27,1610612745,Houston Rockets,82,20,62,0.244,47.5,38.8,85.1,0.456,...,17,28,21,1,26,29,10,Houston Rockets,0,2021-22
28,1610612754,Indiana Pacers,82,25,57,0.305,48.7,41.6,89.9,0.463,...,3,21,24,23,18,24,10,Indiana Pacers,0,2021-22


In [None]:
# save to google drive
os.chdir('/content/drive/MyDrive/Colab Notebooks')
df.to_csv('nba_season_vs_playoff.csv', index = False)

In [None]:
# download local copy
from google.colab import files
files.download("nba_season_vs_playoff.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>