In [18]:
import os
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
import re
import datetime

In [154]:
SCORES_DIR = '/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/data/HTML/scores'

In [155]:
box_scores = os.listdir(SCORES_DIR)

In [156]:
box_scores = [os.path.join(SCORE_DIR, f) for f in box_scores if f.endswith(".html") and (f.startswith("20240404"))] #or f.startswith("201710")or f.startswith("201711")or f.startswith("201712"))]





In [153]:
box_scores

[]

In [12]:
def parse_html(box_scores):
    with open(box_scores) as f:
        html = f.read()
        
    soup = BeautifulSoup(html)
    [s.decompose() for s in soup.select("tr.over_header")]
    [s.decompose() for s in soup.select("tr.thead")]
    
    return soup

def read_line_score(soup):
    html = StringIO(str(soup))
    line_score = pd.read_html(html, attrs = {"id": "line_score"})[0]
    columns = list(line_score.columns)
    columns[0] = "Teams"
    columns[-1] = "Total"
    line_score.columns = columns 
    
    line_score = line_score[["Teams", "Total"]]
    return line_score

def four_factors(soup):
    html = StringIO(str(soup))
    factors = pd.read_html(html, attrs = {"id": "four_factors"})[0]
    columns = list(factors.columns)
    factors.columns = columns
    
    factors = factors[['Pace', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', 'ORtg']]
    return factors

def read_stats(soup, team, stat):
    html = StringIO(str(soup))
    stats = pd.read_html(html, attrs = {"id": f"box-{team}-game-{stat}"}, index_col = 0)[0]
    stats = stats.apply(pd.to_numeric, errors = "coerce")
    return stats

def read_season_info(soup):
    nav = soup.select("#bottom_nav_container")[0]
    hrefs = [a["href"] for a in nav.find_all("a")]
    season = os.path.basename(hrefs[1]).split("_")[0]
    return season

def read_record(soup):
    he = []
    html = StringIO(str(soup))
    scoreboxes = soup.find_all(class_='scorebox')
    score_pattern = re.compile(r'\d+-\d+')
    
    for scorebox in scoreboxes:
        # Find all div elements within each scorebox that match the score pattern
        score_divs = scorebox.find_all('div', string=score_pattern)
        # If any matching divs are found
        if score_divs:
            # Loop through each matching div
            for score_div in score_divs:
                # Do something with the div
                record = score_div.text.split("-")
                df_record = pd.DataFrame([record])
                he.append(df_record)
    df = pd.concat(he)
    columns = list(df.columns)
    columns[0] = "Wins"
    columns[1] = "Losses"
    df.columns = columns
    df = df.reset_index()
    del df["index"]
    return df

In [13]:
base_cols = None
games = []

for box_score in box_scores:
    soup = parse_html(box_score)
    line_score = read_line_score(soup)
    factors = four_factors(soup)
    try:
        records = read_record(soup)
    except:
        continue
    teams = list(line_score['Teams'])
    summaries = []
    for team in teams:
        basic = read_stats(soup, team, "basic")
        advanced = read_stats(soup, team, "advanced")
        advanced.drop("MP", axis=1, inplace=True)
        total = pd.concat([basic.iloc[-1,:], advanced.iloc[-1,:]])
        total.index = total.index.str.lower()

        maxes = pd.concat([basic.iloc[:-1].max(), advanced.iloc[:-1].max()])
        maxes.index = maxes.index.str.lower() + "maxes"

        summary = pd.concat([total, maxes])

        if base_cols is None:
            base_cols = list(summary.index.drop_duplicates(keep='first'))
            base_cols = [b for b in base_cols if "bpm" not in b]

        summary = summary[base_cols]
        summaries.append(summary)

    summary = pd.concat(summaries, axis =1).T

    game = pd.concat([summary, line_score,factors, records], axis = 1)
    game['home'] = [0,1]

    game_opp = game.iloc[::-1].reset_index()
    game_opp.columns += "_opp"

    full_game = pd.concat([game, game_opp], axis = 1)

    full_game["season"] = read_season_info(soup)
    full_game["date"] = os.path.basename(box_score)[:8]
    full_game["date"] = pd.to_datetime(full_game["date"], format = "%Y%m%d")
    full_game["won"] = full_game["Total"] > full_game["Total_opp"]

    games.append(full_game)

    if (len(games) % 100 == 0):
        print(f"{len(games)} / {len(box_scores)}")

In [14]:
green_eggs = pd.concat(games, ignore_index = True)

In [27]:
def setup(df, whole):
    date = whole['date'].iloc[-1][:10]
    before = pd.to_datetime(date)
    #after = pd.Timestamp(datetime.date(2024,12,12))

     # Subtract a year from 'date' column values exceeding the threshold
    for index,row in df.iterrows():
        if row['date'] <= before:
            df.drop(labels=index, axis=0, inplace=True)
            
    df = df.sort_values("date")
    df = df.reset_index(drop=True)
    
    del df['index_opp']
    del df["mpmaxes"]
    del df["mpmaxes_opp"]
    del df['+/-']
    del df["+/-_opp"]
    
    return df

In [31]:
folder_path = "/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/data/raw_data/"
csv_path = "NBA_2018_2024.csv"
complete_path = folder_path + csv_path
whole = pd.read_csv(complete_path, index_col=0)

In [28]:
df = setup(green_eggs, whole)
complete = pd.concat([whole,df], axis=0)

In [32]:
csv_path = "NBA_2018_2024.csv"
complete_path = folder_path + csv_path
complete.to_csv(complete_path)

In [37]:
len(complete.columns.to_list())

157

In [38]:
from nba_api.stats.endpoints import boxscoretraditionalv2, boxscoreadvancedv2

game_id = '0021900017'  # Example game ID

# Traditional Box Score for Teams
boxscore_traditional = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
team_stats_traditional = boxscore_traditional.team_stats.get_data_frame()

# Advanced Box Score for Teams
boxscore_advanced = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
team_stats_advanced = boxscore_advanced.team_stats.get_data_frame()

      GAME_ID     TEAM_ID TEAM_NAME TEAM_ABBREVIATION TEAM_CITY  \
0  0021900017  1610612761   Raptors               TOR   Toronto   
1  0021900017  1610612738   Celtics               BOS    Boston   

             MIN  FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  FTA  FT_PCT  \
0  240.000000:00   37   78   0.474    18    36    0.500   14   17   0.824   
1  240.000000:00   42  109   0.385    13    38    0.342   15   16   0.938   

   OREB  DREB  REB  AST  STL  BLK  TO  PF  PTS  PLUS_MINUS  
0     5    41   46   22    2    6  23  28  106        -6.0  
1    21    35   56   21   10    8   9  20  112         6.0  
      GAME_ID     TEAM_ID TEAM_NAME TEAM_ABBREVIATION TEAM_CITY  \
0  0021900017  1610612761   Raptors               TOR   Toronto   
1  0021900017  1610612738   Celtics               BOS    Boston   

             MIN  E_OFF_RATING  OFF_RATING  E_DEF_RATING  DEF_RATING  \
0  240.000000:00         102.4       101.9         106.6       106.7   
1  240.000000:00         106.6       

In [40]:
print(complete.columns.to_list())

['mp', 'fg', 'fga', 'fg%', '3p', '3pa', '3p%', 'ft', 'fta', 'ft%', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'ts%', 'efg%', '3par', 'ftr', 'orb%', 'drb%', 'trb%', 'ast%', 'stl%', 'blk%', 'tov%', 'usg%', 'ortg', 'drtg', 'fgmaxes', 'fgamaxes', 'fg%maxes', '3pmaxes', '3pamaxes', '3p%maxes', 'ftmaxes', 'ftamaxes', 'ft%maxes', 'orbmaxes', 'drbmaxes', 'trbmaxes', 'astmaxes', 'stlmaxes', 'blkmaxes', 'tovmaxes', 'pfmaxes', 'ptsmaxes', '+/-maxes', 'ts%maxes', 'efg%maxes', '3parmaxes', 'ftrmaxes', 'orb%maxes', 'drb%maxes', 'trb%maxes', 'ast%maxes', 'stl%maxes', 'blk%maxes', 'tov%maxes', 'usg%maxes', 'ortgmaxes', 'drtgmaxes', 'Teams', 'Total', 'Pace', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', 'ORtg', 'Wins', 'Losses', 'home', 'mp_opp', 'fg_opp', 'fga_opp', 'fg%_opp', '3p_opp', '3pa_opp', '3p%_opp', 'ft_opp', 'fta_opp', 'ft%_opp', 'orb_opp', 'drb_opp', 'trb_opp', 'ast_opp', 'stl_opp', 'blk_opp', 'tov_opp', 'pf_opp', 'pts_opp', 'ts%_opp', 'efg%_opp', '3par_opp', 'ftr_opp', 'orb%_opp', '

In [39]:
team_stats_traditional

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,21900017,1610612761,Raptors,TOR,Toronto,240.000000:00,37,78,0.474,18,36,0.5,14,17,0.824,5,41,46,22,2,6,23,28,106,-6.0
1,21900017,1610612738,Celtics,BOS,Boston,240.000000:00,42,109,0.385,13,38,0.342,15,16,0.938,21,35,56,21,10,8,9,20,112,6.0


In [41]:
team_stats_advanced

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,E_NET_RATING,NET_RATING,AST_PCT,AST_TOV,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,E_TM_TOV_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,21900017,1610612761,Raptors,TOR,Toronto,240.000000:00,102.4,101.9,106.6,106.7,-4.2,-4.7,0.595,0.96,16.9,0.14,0.647,0.45,22.227,22.1,0.59,0.62,1.0,0.199,104.26,104.5,87.08,104,0.463
1,21900017,1610612738,Celtics,BOS,Boston,240.000000:00,106.6,106.7,102.4,101.9,4.2,4.7,0.5,2.1,14.3,0.353,0.86,0.55,9.52,9.5,0.445,0.483,1.0,0.198,104.26,104.5,87.08,105,0.537


In [42]:
from nba_api.stats.endpoints import leaguegamefinder
from datetime import datetime

# Choose the team and season
team_id = '1610612747' # Example: Los Angeles Lakers
season = '2022-23'

# Use LeagueGameFinder to search for games
game_finder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_nullable=season)
games = game_finder.get_data_frames()[0]

# Format and print a subset of information: Game ID, Matchup, and Game Date
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
games_subset = games[['GAME_ID', 'MATCHUP', 'GAME_DATE']]
print(games_subset.head())


      GAME_ID      MATCHUP  GAME_DATE
0  0042200314  LAL vs. DEN 2023-05-22
1  0042200313  LAL vs. DEN 2023-05-20
2  0042200312    LAL @ DEN 2023-05-18
3  0042200311    LAL @ DEN 2023-05-16
4  0042200236  LAL vs. GSW 2023-05-12


In [86]:
from nba_api.stats.endpoints import scoreboardv2
from datetime import datetime, timedelta

# Calculate yesterday's date
yesterday = datetime.now() - timedelta(days=3)
yesterday_str = yesterday.strftime("%Y-%m-%d")

# Get the scoreboard for yesterday's games
scoreboard = scoreboardv2.ScoreboardV2(game_date=yesterday_str)
games_info = scoreboard.game_header.get_data_frame()

# Placeholder lists for storing box score data
traditional_boxscores = []
advanced_boxscores = []

# Loop through the game IDs to get the box score for each game
for game_id in games_info['GAME_ID']:
    # Traditional Box Score
    boxscore_traditional = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    team_stats_traditional = boxscore_traditional.team_stats.get_data_frame()
    traditional_boxscores.append(team_stats_traditional)

    # Advanced Box Score
    boxscore_advanced = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
    team_stats_advanced = boxscore_advanced.team_stats.get_data_frame()
    advanced_boxscores.append(team_stats_advanced)

# Do something with the boxscores, like printing them or combining into a single DataFrame


In [87]:
advanced_boxscores[0]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CITY,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,E_NET_RATING,NET_RATING,AST_PCT,AST_TOV,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,E_TM_TOV_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,22301094,1610612761,Raptors,TOR,Toronto,240.000000:00,100.7,105.7,118.2,120.8,-17.5,-15.0,0.581,1.47,17.8,0.224,0.704,0.476,15.421,16.2,0.539,0.565,1.0,0.2,109.26,105.5,87.92,105,0.434
1,22301094,1610612747,Lakers,LAL,Los Angeles,240.000000:00,118.2,120.8,100.7,105.7,17.5,15.0,0.731,2.53,23.9,0.296,0.776,0.524,13.853,14.2,0.604,0.602,1.0,0.195,109.26,105.5,87.92,106,0.566


In [117]:
from nba_api.stats.endpoints import boxscoretraditionalv2

game_id = '0022301094' 
boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
player_stats = boxscore.player_stats.get_data_frame()
team_ids = player_stats['TEAM_ID'].unique()
team_max_stats = {}

boxscore_advanced = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
player_advanced_stats = boxscore_advanced.player_stats.get_data_frame()
team_max_advanced_stats = {}

for team_id in team_ids:
    team_data = player_stats[player_stats['TEAM_ID'] == team_id]
    
    # Calculate maxes for all the statistics
    team_max_stats[team_id] = {
        'fgmaxes': team_data['FGM'].max(),
        'fgamaxes': team_data['FGA'].max(),
        'fg%maxes': team_data['FG_PCT'].max(),
        '3pmaxes': team_data['FG3M'].max(),
        '3pamaxes': team_data['FG3A'].max(),
        '3p%maxes': team_data['FG3_PCT'].max(),
        'ftmaxes': team_data['FTM'].max(),
        'ftamaxes': team_data['FTA'].max(),
        'ft%maxes': team_data['FT_PCT'].max(),
        'orbmaxes': team_data['OREB'].max(),
        'drbmaxes': team_data['DREB'].max(),
        'trbmaxes': team_data['REB'].max(),
        'astmaxes': team_data['AST'].max(),
        'stlmaxes': team_data['STL'].max(),
        'blkmaxes': team_data['BLK'].max(),
        'tovmaxes': team_data['TO'].max(),
        'pfmaxes': team_data['PF'].max(),
        'ptsmaxes': team_data['PTS'].max(),
        # Add additional calculations for other stats as needed
    }

for team_id in team_ids:
    team_data = player_advanced_stats[player_advanced_stats['TEAM_ID'] == team_id]
    
    # Calculate maxes for all the advanced statistics you're interested in
    team_max_advanced_stats[team_id] = {
        'ts%maxes': team_data['TS_PCT'].max(),
        'efg%maxes': team_data['EFG_PCT'].max(),
        # '3parmaxes': team_data['FG3A_RATE'].max(),
        # 'ftrmaxes': team_data['FTA_RATE'].max(),
        'orb%maxes': team_data['OREB_PCT'].max(),
        'drb%maxes': team_data['DREB_PCT'].max(),
        'trb%maxes': team_data['REB_PCT'].max(),
        'ast%maxes': team_data['AST_PCT'].max(),
        # 'stl%maxes': team_data['STL_PCT'].max(),
        # 'blk%maxes': team_data['BLK_PCT'].max(),
        # 'tov%maxes': team_data['TOV_PCT'].max(),
        'usg%maxes': team_data['USG_PCT'].max(),
        'ortgmaxes': team_data['OFF_RATING'].max(),
        'drtgmaxes': team_data['DEF_RATING'].max(),
        # Add additional calculations for other stats as needed
    }


In [118]:
player_advanced_stats

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,DEF_RATING,E_NET_RATING,NET_RATING,AST_PCT,AST_TOV,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,22301094,1610612747,LAL,Los Angeles,1629060,Rui Hachimura,Rui,F,,25.000000:53,122.4,128.1,110.9,117.5,11.5,10.5,0.16,0.0,30.8,0.045,0.111,0.082,0.0,0.778,0.778,0.145,0.146,111.31,105.71,88.09,57,0.139
1,22301094,1610612747,LAL,Los Angeles,2544,LeBron James,LeBron,F,,28.000000:47,124.1,129.5,107.5,108.2,16.6,21.3,0.391,9.0,39.1,0.0,0.143,0.069,4.3,0.875,0.81,0.197,0.198,104.26,101.73,84.77,61,0.221
2,22301094,1610612747,LAL,Los Angeles,203076,Anthony Davis,Anthony,C,,27.000000:31,122.1,123.0,96.9,103.4,25.3,19.6,0.05,0.2,3.7,0.071,0.323,0.203,18.5,0.5,0.483,0.388,0.398,108.47,104.66,87.22,61,0.092
3,22301094,1610612747,LAL,Los Angeles,1630559,Austin Reaves,Austin,G,,25.000000:12,125.7,128.3,112.6,119.2,13.2,9.1,0.24,6.0,35.3,0.04,0.115,0.078,5.9,0.278,0.354,0.183,0.189,103.96,100.0,83.33,53,0.063
4,22301094,1610612747,LAL,Los Angeles,1626156,D'Angelo Russell,D'Angelo,G,,32.000000:33,126.4,128.9,98.5,101.4,27.9,27.6,0.219,1.75,25.0,0.0,0.057,0.029,14.3,0.735,0.735,0.247,0.248,113.31,110.6,92.17,76,0.125
5,22301094,1610612747,LAL,Los Angeles,203915,Spencer Dinwiddie,Spencer,,,24.000000:58,120.3,119.3,80.1,85.7,40.3,33.6,0.043,0.0,14.3,0.0,0.032,0.017,0.0,0.75,0.75,0.092,0.096,111.97,108.62,90.52,57,0.07
6,22301094,1610612747,LAL,Los Angeles,1627752,Taurean Prince,Taurean,,,24.000000:11,109.1,107.3,84.1,85.2,25.0,22.1,0.25,5.0,35.7,0.061,0.12,0.086,7.1,0.5,0.5,0.136,0.138,107.98,108.17,90.14,55,0.122
7,22301094,1610612747,LAL,Los Angeles,1631108,Max Christie,Max,,,20.000000:10,109.2,106.1,85.1,88.0,24.1,18.1,0.2,0.0,23.1,0.074,0.227,0.143,0.0,0.6,0.6,0.179,0.183,118.25,117.82,98.18,49,0.188
8,22301094,1610612747,LAL,Los Angeles,1629637,Jaxson Hayes,Jaxson,,,18.000000:33,116.6,116.3,101.7,104.8,14.9,11.5,0.111,2.0,25.0,0.174,0.188,0.179,12.5,0.6,0.643,0.115,0.116,111.47,109.97,91.64,43,0.078
9,22301094,1610612747,LAL,Los Angeles,1641721,Maxwell Lewis,Maxwell,,,6.000000:06,75.0,69.2,164.6,161.5,-89.6,-92.3,0.0,0.0,0.0,0.1,0.0,0.083,0.0,0.25,0.25,0.235,0.25,97.42,102.3,85.25,13,-0.015


In [None]:
# '3par'
# 'ftr'
# 'stl%'
# 'blk%'
# 'tov%'

In [106]:
game_lal_and_tor = pd.DataFrame(team_max_stats).rename({1610612747:'LAL', 1610612761:'TOR'}, axis=1)

In [105]:
lakers = complete[complete['Teams'] == 'LAL']
cols_to_keep = ['fgmaxes', 'fgamaxes', 'fg%maxes', '3pmaxes', '3pamaxes', 
                 '3p%maxes', 'ftmaxes', 'ftamaxes', 'ft%maxes', 'orbmaxes', 
                 'drbmaxes', 'trbmaxes', 'astmaxes', 'stlmaxes', 'blkmaxes',
                 'tovmaxes', 'pfmaxes', 'ptsmaxes']
lakers[lakers['date'] == '2024-04-02 00:00:00'][cols_to_keep]

Unnamed: 0,fgmaxes,fgamaxes,fg%maxes,3pmaxes,3pamaxes,3p%maxes,ftmaxes,ftamaxes,ft%maxes,orbmaxes,drbmaxes,trbmaxes,astmaxes,stlmaxes,blkmaxes,tovmaxes,pfmaxes,ptsmaxes
4,10.0,20.0,0.833,7.0,14.0,1.0,2.0,5.0,1.0,4.0,10.0,12.0,9.0,2.0,3.0,5.0,4.0,25.0


In [110]:
game_lal_and_tor[['LAL']].T

Unnamed: 0,fgmaxes,fgamaxes,fg%maxes,3pmaxes,3pamaxes,3p%maxes,ftmaxes,ftamaxes,ft%maxes,orbmaxes,drbmaxes,trbmaxes,astmaxes,stlmaxes,blkmaxes,tovmaxes,pfmaxes,ptsmaxes
LAL,10.0,20.0,0.833,7.0,14.0,1.0,2.0,5.0,1.0,4.0,10.0,12.0,9.0,2.0,3.0,5.0,4.0,25.0


In [101]:
cols_to_keep = ['fgmaxes', 'fgamaxes', 'fg%maxes', '3pmaxes', '3pamaxes', 
                 '3p%maxes', 'ftmaxes', 'ftamaxes', 'ft%maxes', 'orbmaxes', 
                 'drbmaxes', 'trbmaxes', 'astmaxes', 'stlmaxes', 'blkmaxes',
                 'tovmaxes', 'pfmaxes', 'ptsmaxes']




['mp',
 'fg',
 'fga',
 'fg%',
 '3p',
 '3pa',
 '3p%',
 'ft',
 'fta',
 'ft%',
 'orb',
 'drb',
 'trb',
 'ast',
 'stl',
 'blk',
 'tov',
 'pf',
 'pts',
 'ts%',
 'efg%',
 '3par',
 'ftr',
 'orb%',
 'drb%',
 'trb%',
 'ast%',
 'stl%',
 'blk%',
 'tov%',
 'usg%',
 'ortg',
 'drtg',
 'fgmaxes',
 'fgamaxes',
 'fg%maxes',
 '3pmaxes',
 '3pamaxes',
 '3p%maxes',
 'ftmaxes',
 'ftamaxes',
 'ft%maxes',
 'orbmaxes',
 'drbmaxes',
 'trbmaxes',
 'astmaxes',
 'stlmaxes',
 'blkmaxes',
 'tovmaxes',
 'pfmaxes',
 'ptsmaxes',
 '+/-maxes',
 'ts%maxes',
 'efg%maxes',
 '3parmaxes',
 'ftrmaxes',
 'orb%maxes',
 'drb%maxes',
 'trb%maxes',
 'ast%maxes',
 'stl%maxes',
 'blk%maxes',
 'tov%maxes',
 'usg%maxes',
 'ortgmaxes',
 'drtgmaxes',
 'Teams',
 'Total',
 'Pace',
 'eFG%',
 'TOV%',
 'ORB%',
 'FT/FGA',
 'ORtg',
 'Wins',
 'Losses',
 'home',
 'mp_opp',
 'fg_opp',
 'fga_opp',
 'fg%_opp',
 '3p_opp',
 '3pa_opp',
 '3p%_opp',
 'ft_opp',
 'fta_opp',
 'ft%_opp',
 'orb_opp',
 'drb_opp',
 'trb_opp',
 'ast_opp',
 'stl_opp',
 'blk_opp',

In [119]:
pip install basketball_reference_web_scraper

Collecting basketball_reference_web_scraper
  Downloading basketball_reference_web_scraper-4.14.0-py3-none-any.whl.metadata (3.4 kB)
Collecting lxml<6.0.0,>=5.1.0 (from basketball_reference_web_scraper)
  Downloading lxml-5.2.1-cp311-cp311-macosx_10_9_x86_64.whl.metadata (3.4 kB)
Collecting pytz<2025.0,>=2024.1 (from basketball_reference_web_scraper)
  Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading basketball_reference_web_scraper-4.14.0-py3-none-any.whl (24 kB)
Downloading lxml-5.2.1-cp311-cp311-macosx_10_9_x86_64.whl (4.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m246.1 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading pytz-2024.1-py2.py3-none-any.whl (505 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m505.5/505.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pytz, lxml, basketball_reference_web_scraper
  Attempting uninstall:

In [120]:
from basketball_reference_web_scraper import client

client.team_box_scores(day=2, month=4, year=2024)

[{'team': <Team.SAN_ANTONIO_SPURS: 'SAN ANTONIO SPURS'>,
  'outcome': <Outcome.LOSS: 'LOSS'>,
  'minutes_played': 240,
  'made_field_goals': 42,
  'attempted_field_goals': 105,
  'made_three_point_field_goals': 13,
  'attempted_three_point_field_goals': 44,
  'made_free_throws': 8,
  'attempted_free_throws': 16,
  'offensive_rebounds': 15,
  'defensive_rebounds': 44,
  'assists': 33,
  'steals': 4,
  'blocks': 12,
  'turnovers': 7,
  'personal_fouls': 19,
  'points': 105},
 {'team': <Team.DENVER_NUGGETS: 'DENVER NUGGETS'>,
  'outcome': <Outcome.WIN: 'WIN'>,
  'minutes_played': 240,
  'made_field_goals': 43,
  'attempted_field_goals': 98,
  'made_three_point_field_goals': 11,
  'attempted_three_point_field_goals': 34,
  'made_free_throws': 13,
  'attempted_free_throws': 16,
  'offensive_rebounds': 11,
  'defensive_rebounds': 41,
  'assists': 28,
  'steals': 5,
  'blocks': 10,
  'turnovers': 8,
  'personal_fouls': 15,
  'points': 110},
 {'team': <Team.DALLAS_MAVERICKS: 'DALLAS MAVERICKS'

In [122]:
import requests

url = "https://api-nba-v1.p.rapidapi.com/seasons"

headers = {
	"X-RapidAPI-Key": "6053b49e4dmsh3e00844e40ca99ap1aed88jsnbfa1eb53ffe8",
	"X-RapidAPI-Host": "api-nba-v1.p.rapidapi.com"
}

response = requests.get(url, headers=headers)

print(response.json())

{'get': 'seasons/', 'parameters': [], 'errors': [], 'results': 9, 'response': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]}


In [123]:
import requests

url = "https://api-nba-v1.p.rapidapi.com/players/statistics"

querystring = {"game":"8133"}

headers = {
	"X-RapidAPI-Key": "6053b49e4dmsh3e00844e40ca99ap1aed88jsnbfa1eb53ffe8",
	"X-RapidAPI-Host": "api-nba-v1.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

{'get': 'players/statistics', 'parameters': {'game': '8133'}, 'errors': [], 'results': 37, 'response': [{'player': {'id': 734, 'firstname': 'Dwayne', 'lastname': 'Bacon'}, 'team': {'id': 26, 'name': 'Orlando Magic', 'nickname': 'Magic', 'code': 'ORL', 'logo': 'https://upload.wikimedia.org/wikipedia/fr/b/bd/Orlando_Magic_logo_2010.png'}, 'game': {'id': 8133}, 'points': 14, 'pos': 'SF', 'min': '21:56', 'fgm': 6, 'fga': 9, 'fgp': '66.7', 'ftm': 1, 'fta': 1, 'ftp': '100', 'tpm': 1, 'tpa': 3, 'tpp': '33.3', 'offReb': 0, 'defReb': 2, 'totReb': 2, 'assists': 1, 'pFouls': 1, 'steals': 2, 'turnovers': 1, 'blocks': 0, 'plusMinus': '6', 'comment': None}, {'player': {'id': 195, 'firstname': 'Aaron', 'lastname': 'Gordon'}, 'team': {'id': 26, 'name': 'Orlando Magic', 'nickname': 'Magic', 'code': 'ORL', 'logo': 'https://upload.wikimedia.org/wikipedia/fr/b/bd/Orlando_Magic_logo_2010.png'}, 'game': {'id': 8133}, 'points': 12, 'pos': 'PF', 'min': '16:19', 'fgm': 4, 'fga': 7, 'fgp': '57.1', 'ftm': 3, 'ft

In [124]:
pd.DataFrame(response.json())

ValueError: All arrays must be of the same length

In [125]:
import requests

url = "https://api-nba-v1.p.rapidapi.com/games/statistics"

querystring = {"id":"10403"}

headers = {
	"X-RapidAPI-Key": "6053b49e4dmsh3e00844e40ca99ap1aed88jsnbfa1eb53ffe8",
	"X-RapidAPI-Host": "api-nba-v1.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

{'get': 'games/statistics', 'parameters': {'id': '10403'}, 'errors': [], 'results': 2, 'response': [{'team': {'id': 5, 'name': 'Charlotte Hornets', 'nickname': 'Hornets', 'code': 'CHA', 'logo': 'https://upload.wikimedia.org/wikipedia/fr/thumb/f/f3/Hornets_de_Charlotte_logo.svg/1200px-Hornets_de_Charlotte_logo.svg.png'}, 'statistics': [{'fastBreakPoints': 15, 'pointsInPaint': 70, 'biggestLead': 28, 'secondChancePoints': 18, 'pointsOffTurnovers': 24, 'longestRun': 12, 'points': 141, 'fgm': 54, 'fga': 97, 'fgp': '55.7', 'ftm': 15, 'fta': 23, 'ftp': '65.2', 'tpm': 18, 'tpa': 42, 'tpp': '42.9', 'offReb': 15, 'defReb': 36, 'totReb': 51, 'assists': 36, 'pFouls': 22, 'steals': 13, 'turnovers': 18, 'blocks': 2, 'plusMinus': '22', 'min': '240:00'}]}, {'team': {'id': 10, 'name': 'Detroit Pistons', 'nickname': 'Pistons', 'code': 'DET', 'logo': 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/39/Logo_of_the_Detroit_Pistons.png/300px-Logo_of_the_Detroit_Pistons.png'}, 'statistics': [{'fastBre