- basketball game as a sequence of long and short call option 
- each possession is an option on option
- offensive possession = long call option
  - option 1
    - attempt 3 = Max(3,0)
      - score 3
      - fouled -> freethrow
      - miss
    - attempt 2 = Max(2,0)
      - score 2
      - fouled -> freethrow
      - miss
    - turnover / offensive foul = 0
- defensive possession = short call option
- sequence: offensive possession must be followed by defensive possession, and vice versa
- goal: maximize the value of long option and minimize the value of short option
- specific to basketball: 

In [3]:
import numpy as np
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
import requests
import concurrent.futures
from datetime import datetime
from tqdm import tqdm

# Does performance in regular season predict playoff ranking?
- supervised offline multi-class classification task
- performance measure: confusion matrix
- data source: https://www.nba.com/stats/teams/traditional/?sort=W&dir=-1&Season=2021-22&SeasonType=Regular%20Season 

In [5]:
def get_team_stats(season_type, season, playoff_round = 0):
    # credits: https://towardsdatascience.com/how-scraping-nba-stats-is-cooler-than-michael-jordan-49d7562ce3ef
    # Under the Header tab, select general and copy the first part of the request URL
    url = 'https://stats.nba.com/stats/leaguedashteamstats'
    # Header Tab, under "Query String Parameter" subsection
    prms = (
        
        ('MeasureType', 'Base'), # Traditional
        ('Season', season),
        ('SeasonType', season_type),
        ('PerMode', 'Per100Possessions'), # Per 100 Possessions since teams have different PACE
        ('SeasonSegment', ''),
        ('LastNGames', 0),
        
        
        ('PlusMinus', 'N'),
        ('PaceAdjust', 'N'), # adjust by game pace
        ('Rank', 'N'),
        ('LeagueID', '00'),   
        ('Month', 0),
        ('GameScope', ''),
        
        # Player Bio
        ('PlayerPosition', ''),
        ('StarterBench', ''),
        ('PlayerExperience', ''),
        ('TwoWay', 0),
        
        # Team
        ('TeamID', 0),
        ('OpponentTeamID', 0),
        ('Division', ''),
        ('VsDivision', ''),
        ('Conference', ''),
        ('VsConference', ''),
        
        # Game Situation
        ('Outcome', ''),
        ('Location', ''),
        ('ShotClockRange', ''),
        ('Period', 0),
        ('GameSegment', ''),
        
        # Date Range
        ('PORound', playoff_round),
        ('DateFrom', ''),
        ('DateTo', ''),  
        
        
    )
    
    http_proxy  = "http://10.10.1.10:3128"
    https_proxy = "https://10.10.1.11:1080"
    ftp_proxy   = "ftp://10.10.1.10:3128"

    proxies = { 
                "http"  : http_proxy, 
                "https" : https_proxy, 
                "ftp"   : ftp_proxy
                }
    # Header tab, under “Request Headers” subsection
    hdr = {
    "accept": "application/json, text/plain, */*",
    "accept-encoding": "gzip, deflate, br",
    "accept-language": "en-US,en;q=0.9",
    "connection": "keep-alive",
    "origin": "https://www.nba.com",
    "referer": "https://www.nba.com/",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "Windows",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
    "x-nba-stats-origin": "stats",
    "x-nba-stats-token": "true",
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache'
    }
    #Using Request library to get the data
    with requests.Session() as s:
        try:
            response = s.get(url, headers=hdr, params=prms, timeout=5, proxies=proxies)
        except requests.exceptions.Timeout:
            print(f'Timeout for {season}')
            return None
        response_json = response.json()
        frame = pd.DataFrame(response_json['resultSets'][0]['rowSet'])
        frame.columns = response_json['resultSets'][0]['headers']
    return frame

def season_vs_playoff(season):
    season_df = get_team_stats(season_type='Regular Season', season=season)
    # playoff ranking
    # 0 = didn't enter playoffs
    # 1 = enter playoffs
    # 2 = conference quarter final winners
    # 3 = conference semi final winners
    # 4 = conference final champions
    # 5 = final champion
    playoff_df = get_team_stats(season_type='Playoffs', season=season)
    
    # win 4 games = win 1 round
    # ranking = 0 for teams which didn't enter playoffs
    # ranking = 5 for champion which won 4 rounds, 16 games
    playoff_df['PLAYOFF_RANKING'] = playoff_df['W'].apply(lambda x: int(x/4) + 1)
    playoff_df = playoff_df[['TEAM_ID', 'PLAYOFF_RANKING']] 

    season_vs_playoff = pd.merge(season_df, playoff_df, on='TEAM_ID', how = 'outer')
    season_vs_playoff.fillna(0, inplace=True) # teams which are not in playoff will have 0 as PLAYOFF_RANKING
    season_vs_playoff['PLAYOFF_RANKING'] = season_vs_playoff['PLAYOFF_RANKING'].astype(int)
    season_vs_playoff['Season'] = season
    output = pd.concat([output,season_vs_playoff.sort_values(by='PLAYOFF_RANKING', ascending = False).reset_index(drop=True)], axis = 0)
  
    return output


In [6]:
season_vs_playoff(season='1996-97')

Timeout for 1996-97
Timeout for 1996-97


TypeError: 'NoneType' object is not subscriptable

In [22]:
def parallel_request_team_stats(start_year, end_year):
    output = None
    season_list = [str(i) + '-' + str(i+1)[2:] for i in range(start_year, end_year)]
    
    with tqdm(total=len(season_list)) as pbar:
        # We can use a with statement to ensure threads are cleaned up promptly
        with concurrent.futures.ThreadPoolExecutor() as executor:
            # Start the load operations and mark each future with its object_ID
            future_to_object_ID = {executor.submit(season_vs_playoff, season): chunk_n for chunk_n, season in enumerate(season_list)}
            for future in concurrent.futures.as_completed(future_to_object_ID):
                chunk_n = future_to_object_ID[future]
                # print(f'{(chunk_n + 1)/len(objectIDs_chunks)*100:0.0f}% completed')
                try:
                    output = pd.concat([output, future.result()], axis=0)
                except Exception as exc:
                    print('Chunk %r generated an exception: %s' % (chunk_n, exc))
                    return
                # else:
                #     print('%r page is %d bytes' % (chunk_n, len(future.result())))
                pbar.update(1)
    return output

In [24]:
df = parallel_request_team_stats(start_year=1996, end_year=2022)

  0%|          | 0/26 [00:00<?, ?it/s]

Timeout for 1999-00
Chunk 3 generated an exception: 'NoneType' object has no attribute 'value_counts'
Timeout for 2007-08
Timeout for 2003-04
Timeout for 2004-05
Timeout for 2006-07
Timeout for 2000-01
Timeout for 2005-06
Timeout for 2001-02
Timeout for 2002-03
Timeout for 1997-98
Timeout for 1998-99
Timeout for 1996-97
Timeout for 2008-09
Timeout for 2009-10
Timeout for 2010-11
Timeout for 2011-12
Timeout for 2012-13
Timeout for 2013-14
Timeout for 2014-15
Timeout for 2015-16
Timeout for 2016-17
Timeout for 2017-18
Timeout for 2019-20
Timeout for 2018-19


  0%|          | 0/26 [00:21<?, ?it/s]

Timeout for 2020-21
Timeout for 2021-22





In [25]:
output = None
for i in [str(i) + '-' + str(i+1)[2:] for i in range(1996, 2000)]:    
    output = pd.concat([output, season_vs_playoff(season=i)], axis=0)

Timeout for 1996-97


ValueError: All objects passed were None

In [22]:
output

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,FG3_PCT_RANK,FTM_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,CFID,CFPARAMS,PLAYOFF_RANKING,Season
0,1610612741,Chicago Bulls,82,69,13,0.841,52.5,43.6,92.0,0.473,7.0,18.7,0.373,18.4,24.6,0.747,16.4,32.7,49.1,28.5,14.7,9.5,4.4,3.8,21.5,0.2,112.4,11.8,1,1,1,1,13,1,1,3,11,10,6,26,28,12,2,3,1,2,2,6,26,1,2,7,1,1,10,Chicago Bulls,5,1996-97
1,1610612762,Utah Jazz,82,64,18,0.780,52.7,41.6,82.7,0.504,4.4,12.0,0.370,24.7,32.1,0.769,11.8,32.0,43.9,29.2,16.7,9.9,5.6,5.0,26.3,0.2,112.4,9.6,1,2,2,2,12,2,28,1,27,29,8,1,1,3,28,8,20,1,10,3,12,9,26,6,2,2,10,Utah Jazz,4,1996-97
2,1610612745,Houston Rockets,82,57,25,0.695,51.7,39.6,84.5,0.468,8.7,24.0,0.365,19.6,26.0,0.755,12.1,33.4,45.5,26.2,17.8,9.0,4.5,4.8,21.0,0.2,107.5,4.8,1,4,4,4,21,9,24,7,3,3,11,19,23,7,26,1,7,5,25,12,24,7,1,13,7,7,10,Houston Rockets,3,1996-97
3,1610612748,Miami Heat,82,61,21,0.744,53.6,38.2,84.3,0.453,9.2,25.2,0.364,19.7,27.4,0.719,12.9,32.5,45.4,23.5,17.7,8.8,5.9,5.3,26.0,0.3,105.2,6.1,1,3,3,3,6,22,26,18,1,1,12,18,13,23,20,5,9,15,21,16,7,14,25,4,12,5,10,Miami Heat,3,1996-97
4,1610612737,Atlanta Hawks,82,56,26,0.683,54.2,38.5,86.3,0.446,9.0,25.1,0.360,20.4,26.8,0.763,14.0,32.2,46.2,21.3,16.8,9.6,5.8,4.5,21.8,0.1,106.4,6.1,1,7,7,7,4,18,11,20,2,2,15,15,17,4,11,7,4,28,13,5,9,6,4,27,8,4,10,Atlanta Hawks,2,1996-97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24,1610612744,Golden State Warriors,82,19,63,0.232,51.5,36.9,89.3,0.413,2.5,9.0,0.272,17.6,24.8,0.710,16.7,32.1,48.8,22.2,17.8,8.1,5.8,6.2,24.1,0.1,93.9,-9.7,1,25,25,25,22,28,3,29,29,28,29,28,27,25,2,8,3,22,23,25,11,24,15,28,29,27,10,Golden State Warriors,0,1997-98
25,1610612765,Detroit Pistons,82,37,45,0.451,53.4,38.3,85.3,0.449,3.9,12.6,0.312,22.8,30.6,0.745,14.0,31.3,45.3,21.4,16.0,9.1,4.6,5.1,24.2,0.2,103.4,1.7,1,19,19,19,7,19,18,18,22,15,26,5,5,12,12,12,10,25,9,12,24,12,17,19,14,13,10,Detroit Pistons,0,1997-98
26,1610612743,Denver Nuggets,82,11,71,0.134,52.6,35.7,85.5,0.418,3.8,11.9,0.323,22.1,28.6,0.772,13.9,28.8,42.6,20.6,17.5,8.9,5.2,7.1,25.8,0.2,97.4,-12.9,1,29,29,29,12,29,17,28,24,20,25,7,14,2,15,26,26,27,21,17,17,29,25,8,28,29,10,Denver Nuggets,0,1997-98
27,1610612742,Dallas Mavericks,82,20,62,0.244,52.6,38.0,89.1,0.427,5.6,15.6,0.357,17.3,22.9,0.753,13.3,30.1,43.4,20.3,15.6,8.5,6.2,4.9,21.7,0.2,98.9,-6.6,1,24,24,24,13,22,4,27,7,8,8,29,29,9,20,20,23,28,5,22,6,6,2,21,27,24,10,Dallas Mavericks,0,1997-98
