In [487]:
from selenium import webdriver
from datetime import datetime
import pandas as pd
import numpy as np
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException


# Set up the Selenium WebDriver (make sure to have the appropriate webdriver installed)
driver = webdriver.Edge(executable_path=r"C:\Program Files (x86)\msedgedriver.exe")


# URL of the webpage containing the dropdown menu
url = f"https://www.basketball-reference.com/leagues/NBA_2024_per_game.html"
driver.get(url)

# Player Ratings

In [492]:
# Have the table only show accurate data
hide_partial_row_button = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="per_game_stats_toggle_partial_table"]'))
)
hide_partial_row_button.click()
print('Hide rows with partial data')

Hide rows with partial data


In [493]:
# Get table with data
table = driver.find_element(By.XPATH,'//*[@id="per_game_stats"]')

# Get table header
headers = table.find_element(By.XPATH, '//*[@id="per_game_stats"]/thead').text.split(' ')

# Get table data
data = [row.text.split(' ') for row in table.find_elements(By.TAG_NAME, 'tr')][1:]

In [494]:
# Remove 'Player' column from list
headers.pop(1)

# Add 'First Name' to list
headers.insert(1, 'First Name')

# Add 'Last Name' to list
headers.insert(2, 'Last Name')

In [495]:
# Create a pandas Data Frame
player_df = pd.DataFrame(data=data)

# Drop last column
player_df = player_df.drop(player_df.columns[31],axis=1)

# Add headers
player_df.columns = headers

In [496]:
player_df.head()

Unnamed: 0,Rk,First Name,Last Name,Pos,Age,Tm,G,GS,MP,FG,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1.0,Precious,Achiuwa,C-PF,24.0,TOT,27.0,0.0,16.9,3.0,...,0.571,2.0,3.3,5.3,1.7,0.6,0.5,1.1,1.6,7.3
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,2.0,Bam,Adebayo,C,26.0,MIA,24.0,24.0,34.3,7.8,...,0.778,2.2,8.1,10.3,4.0,1.2,1.0,2.6,2.5,21.7
4,3.0,Ochai,Agbaji,SG,23.0,UTA,35.0,10.0,20.8,2.5,...,0.714,0.8,1.7,2.5,1.0,0.7,0.5,0.7,1.5,6.5


In [497]:
player_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 567 entries, 0 to 566
Data columns (total 31 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Rk          567 non-null    object
 1   First Name  545 non-null    object
 2   Last Name   545 non-null    object
 3   Pos         545 non-null    object
 4   Age         545 non-null    object
 5   Tm          545 non-null    object
 6   G           545 non-null    object
 7   GS          545 non-null    object
 8   MP          545 non-null    object
 9   FG          545 non-null    object
 10  FGA         545 non-null    object
 11  FG%         545 non-null    object
 12  3P          545 non-null    object
 13  3PA         545 non-null    object
 14  3P%         545 non-null    object
 15  2P          545 non-null    object
 16  2PA         545 non-null    object
 17  2P%         545 non-null    object
 18  eFG%        545 non-null    object
 19  FT          545 non-null    object
 20  FTA       

In [498]:
# Remove rows with any null values'
player_df = player_df.dropna(axis=0, how='any')

In [499]:
player_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 438 entries, 0 to 565
Data columns (total 31 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Rk          438 non-null    object
 1   First Name  438 non-null    object
 2   Last Name   438 non-null    object
 3   Pos         438 non-null    object
 4   Age         438 non-null    object
 5   Tm          438 non-null    object
 6   G           438 non-null    object
 7   GS          438 non-null    object
 8   MP          438 non-null    object
 9   FG          438 non-null    object
 10  FGA         438 non-null    object
 11  FG%         438 non-null    object
 12  3P          438 non-null    object
 13  3PA         438 non-null    object
 14  3P%         438 non-null    object
 15  2P          438 non-null    object
 16  2PA         438 non-null    object
 17  2P%         438 non-null    object
 18  eFG%        438 non-null    object
 19  FT          438 non-null    object
 20  FTA       

In [500]:
# Convert columns with data type object to float
player_df = player_df.apply(pd.to_numeric, errors='ignore', downcast='float')
player_df.dtypes

Rk            float32
First Name     object
Last Name      object
Pos            object
Age            object
Tm             object
G              object
GS            float32
MP            float32
FG            float32
FGA           float32
FG%           float32
3P            float32
3PA           float32
3P%           float32
2P            float32
2PA           float32
2P%           float32
eFG%          float32
FT            float32
FTA           float32
FT%           float32
ORB           float32
DRB           float32
TRB           float32
AST           float32
STL           float32
BLK           float32
TOV           float32
PF            float32
PTS           float32
dtype: object

In [501]:
player_df.head()

Unnamed: 0,Rk,First Name,Last Name,Pos,Age,Tm,G,GS,MP,FG,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1.0,Precious,Achiuwa,C-PF,24,TOT,27,0.0,16.9,3.0,...,0.571,2.0,3.3,5.3,1.7,0.6,0.5,1.1,1.6,7.3
3,2.0,Bam,Adebayo,C,26,MIA,24,24.0,34.299999,7.8,...,0.778,2.2,8.1,10.3,4.0,1.2,1.0,2.6,2.5,21.700001
4,3.0,Ochai,Agbaji,SG,23,UTA,35,10.0,20.799999,2.5,...,0.714,0.8,1.7,2.5,1.0,0.7,0.5,0.7,1.5,6.5
5,4.0,Santi,Aldama,PF,23,MEM,27,5.0,24.0,4.1,...,0.605,1.3,4.3,5.7,1.8,0.6,0.7,1.0,1.6,10.9
6,5.0,Nickeil,Alexander-Walker,SG,25,MIN,33,11.0,22.5,2.4,...,0.684,0.4,1.3,1.7,2.4,0.9,0.6,1.0,1.9,6.5


# Team Ratings

In [502]:
from selenium import webdriver
from datetime import datetime
import pandas as pd
import numpy as np
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException


# Set up the Selenium WebDriver (make sure to have the appropriate webdriver installed)
driver = webdriver.Edge(executable_path=r"C:\Program Files (x86)\msedgedriver.exe")


# URL of the webpage containing the dropdown menu
url = f"https://www.basketball-reference.com/leagues/NBA_2024_ratings.html"
driver.get(url)

In [506]:
# Extract table
table = WebDriverWait(driver, 5).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="ratings"]'))
)

# Get headers used in table
thead = table.find_element(By.TAG_NAME, 'thead')
headers = [header.text.split() for header in thead.find_elements(By.TAG_NAME,'tr')][1]


# Get data from table
tbody = table.find_element(By.XPATH, '//*[@id="ratings"]/tbody')
rows = [row.text.split() for row in tbody.find_elements(By.TAG_NAME, 'tr')]

In [566]:
# Some rows have shifted due to splitting by empty spaces
for row in rows:
    print(row)

['1', 'Boston', 'Celtics', 'E', 'A', '26', '7', '.788', '10.06', '123.06', '112.68', '10.38', '10.56', '123.46', '112.57', '10.88']
['2', 'Philadelphia', '76ers', 'E', 'A', '23', '10', '.697', '10.15', '121.88', '111.70', '10.18', '9.48', '121.44', '111.91', '9.53']
['3', 'Oklahoma', 'City', 'Thunder', 'W', 'NW', '23', '10', '.697', '8.06', '121.86', '113.74', '8.12', '8.89', '122.36', '113.40', '8.96']
['4', 'Minnesota', 'Timberwolves', 'W', 'NW', '24', '9', '.727', '5.00', '115.47', '110.38', '5.09', '5.72', '115.51', '109.69', '5.82']
['5', 'Denver', 'Nuggets', 'W', 'NW', '25', '11', '.694', '5.31', '119.89', '114.37', '5.52', '5.54', '120.36', '114.65', '5.72']
['6', 'Los', 'Angeles', 'Clippers', 'W', 'P', '21', '12', '.636', '4.94', '120.12', '115.20', '4.92', '4.27', '119.78', '115.53', '4.25']
['7', 'Milwaukee', 'Bucks', 'E', 'C', '25', '10', '.714', '5.03', '122.16', '117.22', '4.94', '3.77', '120.90', '117.19', '3.71']
['8', 'Houston', 'Rockets', 'W', 'SW', '17', '15', '.531',

In [567]:
# Extract the team names from table
team_name = []

for i in range(len(rows)):
    if rows[i][1:4][-1] == 'E':
        rows[i][1:4].pop(-1)
        team_name.append(rows[i][1] + ' ' + rows[i][2])
    elif rows[i][1:4][-1] == 'W':
        team_name.append(rows[i][1] + ' ' + rows[i][2])
    elif len(rows[i][1:4]) == 3:
        team_name.append(rows[i][1:4][0] + ' ' + rows[i][1:4][1] + ' ' + rows[i][1:4][2])
                         
team_name

['Boston Celtics',
 'Philadelphia 76ers',
 'Oklahoma City Thunder',
 'Minnesota Timberwolves',
 'Denver Nuggets',
 'Los Angeles Clippers',
 'Milwaukee Bucks',
 'Houston Rockets',
 'New Orleans Pelicans',
 'New York Knicks',
 'Golden State Warriors',
 'Indiana Pacers',
 'Orlando Magic',
 'Cleveland Cavaliers',
 'Sacramento Kings',
 'Team Conf Div',
 'Miami Heat',
 'Dallas Mavericks',
 'Phoenix Suns',
 'Atlanta Hawks',
 'Los Angeles Lakers',
 'Toronto Raptors',
 'Brooklyn Nets',
 'Chicago Bulls',
 'Utah Jazz',
 'Memphis Grizzlies',
 'Portland Trail Blazers',
 'Washington Wizards',
 'Charlotte Hornets',
 'San Antonio Spurs',
 'Detroit Pistons']

In [571]:
for i in team_name:
    if 'Team' in team_name:
        print('found')
    else:
        print('not found')

not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found
not found


In [568]:
team_ratings = pd.DataFrame(data=rows)
team_ratings

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1,Boston,Celtics,E,A,26,7,.788,10.06,123.06,112.68,10.38,10.56,123.46,112.57,10.88,
1,2,Philadelphia,76ers,E,A,23,10,.697,10.15,121.88,111.70,10.18,9.48,121.44,111.91,9.53,
2,3,Oklahoma,City,Thunder,W,NW,23,10,.697,8.06,121.86,113.74,8.12,8.89,122.36,113.4,8.96
3,4,Minnesota,Timberwolves,W,NW,24,9,.727,5.00,115.47,110.38,5.09,5.72,115.51,109.69,5.82,
4,5,Denver,Nuggets,W,NW,25,11,.694,5.31,119.89,114.37,5.52,5.54,120.36,114.65,5.72,
5,6,Los,Angeles,Clippers,W,P,21,12,.636,4.94,120.12,115.20,4.92,4.27,119.78,115.53,4.25
6,7,Milwaukee,Bucks,E,C,25,10,.714,5.03,122.16,117.22,4.94,3.77,120.90,117.19,3.71,
7,8,Houston,Rockets,W,SW,17,15,.531,3.47,115.46,111.96,3.50,3.54,115.54,111.97,3.57,
8,9,New,Orleans,Pelicans,W,SW,21,14,.600,3.91,117.45,113.45,4.00,3.41,117.91,114.43,3.48
9,10,New,York,Knicks,E,A,19,15,.559,2.00,118.96,116.77,2.19,2.37,118.65,116.09,2.56
