In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://www.basketball-reference.com/boxscores/202310240DEN.html'


def get_page_data(url):
    data_source = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    return BeautifulSoup(data_source.content, 'html.parser')


page = get_page_data(url)

# returns a list/tuple with basic_home_table, advanced_home_table, basic_away_table, advanced_away_table
def get_tables(page):
    tables = page.find_all('table')
    return tables[8],tables[15],tables[0],tables[7]

# %%
def get_game_details(page):
    details = page.find(id='content').find('h1').get_text()
    return details.replace(' ', '').replace(',', '_')


game_details = get_game_details(page)
game_details


# %%
def get_basic_table_headers(table):
    return [th.getText().split('\n') for th in table.find_all('tr', limit=5)[1:2]][0][1:-1]


# %%
def get_advanced_table_headers(table):
    headers = [th.getText().split('\n') for th in table.find_all('tr')[1]]
    clean = [header for header in headers if header != ['', '']]
    return [item for new_array in clean for item in new_array]


# %%
def get_table_rows(table):
    return [td for td in table.find('tbody').find_all('tr')]


# %% md

# %%

def get_table_data(rows):
    player_stats = [[td.getText() for td in rows[i].find_all('td')] for i in range(len(rows))]
    player_names = [[td.getText() for td in rows[i].find_all('a')] for i in range(len(rows))]
    return player_names, player_stats


# %%
def make_table_rows(player_names, player_stats):
    player_names[5].insert(5, 'player')
    for i in range(len(player_names)):
        player = player_names[i][0]
        (player_stats[i].insert(0, player))


def make_basic_table_pandas(basic_table):
    player_names, player_stats = get_table_data(rows=get_table_rows(basic_table))
    headers = get_basic_table_headers(basic_table)
    make_table_rows(player_names, player_stats)
    return pd.DataFrame(player_stats, columns=headers)



# %%
def make_advanced_table_pandas(adv_table):
    player_names, player_stats = get_table_data(rows=get_table_rows(adv_table))
    headers = get_advanced_table_headers(adv_table)
    make_table_rows(player_names, player_stats)
    return pd.DataFrame(player_stats, columns=headers)

# %%
def write_basic_tables_to_file(details, basic_table_home, basic_table_away):
    path = './box_score_tables_csv'
    home_path = f'{path}/basic_table_H_{details}.csv'
    away_path = f'{path}/basic_table_A_{details}.csv'
    pd.DataFrame.to_csv(basic_table_home, home_path)
    pd.DataFrame.to_csv(basic_table_away, away_path)


# %%
def write_advanced_tables_to_file(details, adv_table_home, adv_table_away):
    path = './box_score_tables_csv'
    home_path = f'{path}/advanced_table_H_{details}.csv'
    away_path = f'{path}/advanced_table_A_{details}.csv'
    pd.DataFrame.to_csv(adv_table_home, home_path)
    pd.DataFrame.to_csv(adv_table_away, away_path)



In [22]:
make_basic_table_pandas(get_tables(page)[0])


Unnamed: 0,Starters,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-
0,Nikola Jokić,36:16,12.0,22.0,0.545,3.0,5.0,0.6,2.0,4.0,...,3.0,10.0,13.0,11.0,1.0,1.0,2.0,2.0,29.0,15.0
1,Kentavious Caldwell-Pope,36:15,8.0,12.0,0.667,2.0,3.0,0.667,2.0,2.0,...,1.0,1.0,2.0,1.0,3.0,1.0,3.0,5.0,20.0,10.0
2,Aaron Gordon,34:59,7.0,11.0,0.636,1.0,2.0,0.5,0.0,0.0,...,2.0,5.0,7.0,5.0,2.0,1.0,0.0,0.0,15.0,6.0
3,Jamal Murray,34:15,8.0,13.0,0.615,3.0,5.0,0.6,2.0,2.0,...,0.0,2.0,2.0,6.0,0.0,1.0,1.0,3.0,21.0,3.0
4,Michael Porter Jr.,30:08,5.0,13.0,0.385,2.0,9.0,0.222,0.0,0.0,...,2.0,10.0,12.0,2.0,2.0,0.0,0.0,1.0,12.0,12.0
5,player,,,,,,,,,,...,,,,,,,,,,
6,Reggie Jackson,24:04,3.0,8.0,0.375,2.0,5.0,0.4,0.0,0.0,...,0.0,3.0,3.0,1.0,1.0,0.0,2.0,0.0,8.0,11.0
7,Christian Braun,19:20,2.0,5.0,0.4,0.0,1.0,0.0,1.0,2.0,...,1.0,2.0,3.0,2.0,0.0,1.0,1.0,1.0,5.0,5.0
8,Zeke Nnaji,11:44,1.0,3.0,0.333,0.0,1.0,0.0,2.0,2.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,4.0,-3.0
9,Peyton Watson,10:50,1.0,3.0,0.333,1.0,3.0,0.333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,3.0,1.0


In [23]:
make_advanced_table_pandas(get_tables(page)[1])

Unnamed: 0,Starters,MP,TS%,eFG%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,BPM
0,Nikola Jokić,36:16,0.61,0.614,0.227,0.182,9.9,28.8,20.0,45.3,1.4,2.2,7.8,31.8,133.0,109.0,15.0
1,Kentavious Caldwell-Pope,36:15,0.776,0.75,0.25,0.167,3.3,2.9,3.1,3.5,4.2,2.2,18.9,19.6,122.0,111.0,1.9
2,Aaron Gordon,34:59,0.682,0.682,0.182,0.0,6.9,14.9,11.2,17.9,2.9,2.2,0.0,14.1,155.0,110.0,8.7
3,Jamal Murray,34:15,0.756,0.731,0.385,0.154,0.0,6.1,3.3,22.9,0.0,2.3,6.7,19.4,150.0,119.0,5.0
4,Michael Porter Jr.,30:08,0.462,0.462,0.692,0.0,8.0,34.6,22.2,8.0,3.3,0.0,0.0,19.3,111.0,104.0,-0.2
5,player,,,,,,,,,,,,,,,,
6,Reggie Jackson,24:04,0.5,0.5,0.625,0.0,0.0,13.0,7.0,4.7,2.1,0.0,20.0,18.6,82.0,114.0,-8.2
7,Christian Braun,19:20,0.425,0.4,0.2,0.4,6.2,10.8,8.7,11.5,0.0,4.1,14.5,15.9,97.0,117.0,-6.2
8,Zeke Nnaji,11:44,0.515,0.333,0.333,0.667,0.0,0.0,0.0,9.3,0.0,0.0,20.5,18.6,96.0,123.0,-11.9
9,Peyton Watson,10:50,0.5,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,7.3,25.0,16.5,71.0,118.0,-9.3


In [24]:
make_basic_table_pandas(get_tables(page)[2])

Unnamed: 0,Starters,MP,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-
0,D'Angelo Russell,36:11,4.0,12.0,0.333,2.0,5.0,0.4,1.0,2.0,...,0.0,4.0,4.0,7.0,1.0,0.0,3.0,3.0,11.0,1.0
1,Anthony Davis,34:09,6.0,17.0,0.353,1.0,2.0,0.5,4.0,4.0,...,1.0,7.0,8.0,4.0,0.0,2.0,2.0,3.0,17.0,-17.0
2,Austin Reaves,31:20,4.0,11.0,0.364,1.0,2.0,0.5,5.0,7.0,...,4.0,4.0,8.0,4.0,2.0,0.0,2.0,2.0,14.0,-14.0
3,Taurean Prince,29:53,6.0,8.0,0.75,4.0,6.0,0.667,2.0,2.0,...,1.0,2.0,3.0,1.0,0.0,1.0,1.0,0.0,18.0,-14.0
4,LeBron James,29:00,10.0,16.0,0.625,1.0,4.0,0.25,0.0,1.0,...,1.0,7.0,8.0,5.0,1.0,0.0,0.0,1.0,21.0,7.0
5,player,,,,,,,,,,...,,,,,,,,,,
6,Gabe Vincent,22:18,3.0,8.0,0.375,0.0,4.0,0.0,0.0,0.0,...,1.0,0.0,1.0,2.0,1.0,0.0,2.0,3.0,6.0,-17.0
7,Cam Reddish,17:38,2.0,4.0,0.5,1.0,2.0,0.5,2.0,2.0,...,2.0,2.0,4.0,0.0,0.0,1.0,0.0,2.0,7.0,7.0
8,Christian Wood,15:28,3.0,4.0,0.75,0.0,1.0,0.0,1.0,2.0,...,1.0,3.0,4.0,0.0,0.0,0.0,1.0,1.0,7.0,2.0
9,Rui Hachimura,14:39,3.0,10.0,0.3,0.0,3.0,0.0,0.0,0.0,...,2.0,1.0,3.0,0.0,0.0,0.0,0.0,2.0,6.0,-8.0


In [25]:
make_advanced_table_pandas(get_tables(page)[3])

Unnamed: 0,Starters,MP,TS%,eFG%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,BPM
0,D'Angelo Russell,36:11,0.427,0.417,0.417,0.167,0.0,13.3,6.2,26.0,1.4,0.0,18.9,19.2,91.0,125.0,-6.5
1,Anthony Davis,34:09,0.453,0.382,0.118,0.235,3.1,24.6,13.1,17.3,0.0,4.9,9.6,26.6,100.0,122.0,-2.1
2,Austin Reaves,31:20,0.497,0.409,0.182,0.636,13.3,15.3,14.3,17.6,3.2,0.0,12.4,22.4,113.0,121.0,1.4
3,Taurean Prince,29:53,1.014,1.0,0.75,0.25,3.5,8.0,5.6,5.1,0.0,2.8,10.1,14.5,175.0,128.0,11.2
4,LeBron James,29:00,0.639,0.656,0.25,0.063,3.6,29.0,15.4,33.9,1.7,0.0,0.0,24.8,141.0,122.0,11.9
5,player,,,,,,,,,,,,,,,,
6,Gabe Vincent,22:18,0.375,0.375,0.5,0.0,4.7,0.0,2.5,12.5,2.3,0.0,20.0,19.6,77.0,126.0,-11.3
7,Cam Reddish,17:38,0.717,0.625,0.5,0.5,11.8,13.6,12.7,0.0,0.0,4.8,0.0,12.1,163.0,125.0,3.6
8,Christian Wood,15:28,0.717,0.75,0.25,0.5,6.7,23.3,14.4,0.0,0.0,0.0,17.0,16.6,118.0,127.0,-3.3
9,Rui Hachimura,14:39,0.3,0.3,0.3,0.0,14.2,8.2,11.4,0.0,0.0,0.0,0.0,29.8,79.0,130.0,-17.1
