In [429]:
from selenium import webdriver
from datetime import datetime
import pandas as pd
import numpy as np
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException


# Set up the Selenium WebDriver (make sure to have the appropriate webdriver installed)
driver = webdriver.Edge(executable_path=r"C:\Program Files (x86)\msedgedriver.exe")


# URL of the webpage containing the dropdown menu
url = f"https://www.basketball-reference.com/leagues/NBA_2024_per_game.html"
driver.get(url)

# Player Ratings

In [431]:
# Have the table only show accurate data
hide_partial_row_button = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="per_game_stats_toggle_partial_table"]'))
)
hide_partial_row_button.click()
print('Hide rows with partial data')

ElementClickInterceptedException: Message: element click intercepted: Element is not clickable at point (640, 904)
  (Session info: MicrosoftEdge=120.0.2210.91)


In [None]:
# Get table with data
table = driver.find_element(By.XPATH,'//*[@id="per_game_stats"]')

# Get table header
headers = table.find_element(By.XPATH, '//*[@id="per_game_stats"]/thead').text.split(' ')

# Get table data
data = [row.text.split(' ') for row in table.find_elements(By.TAG_NAME, 'tr')][1:]

In [None]:
# Remove 'Player' column from list
headers.pop(1)

# Add 'First Name' to list
headers.insert(1, 'First Name')

# Add 'Last Name' to list
headers.insert(2, 'Last Name')

In [None]:
# Create a pandas Data Frame
player_df = pd.DataFrame(data=data)

# Drop last column
player_df = player_df.drop(player_df.columns[31],axis=1)

# Add headers
player_df.columns = headers

In [None]:
player_df.head()

In [None]:
player_df.info()

In [None]:
# Remove rows with any null values'
player_df = player_df.dropna(axis=0, how='any')

In [None]:
player_df.info()

In [None]:
# Convert columns with data type object to float
player_df = player_df.apply(pd.to_numeric, errors='ignore', downcast='float')
player_df.dtypes

In [None]:
player_df.head()

# Team Ratings

In [499]:
from selenium import webdriver
from datetime import datetime
import pandas as pd
import numpy as np
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException

# Set up the Selenium WebDriver (make sure to have the appropriate webdriver installed)
driver = webdriver.Edge(executable_path=r"C:\Program Files (x86)\msedgedriver.exe")

# URL of the webpage containing the dropdown menu
url = f"https://www.basketball-reference.com/leagues/NBA_2024_ratings.html"
driver.get(url)

In [514]:
# Extract table
table = WebDriverWait(driver, 5).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="ratings"]'))
)

# Get headers used in table
thead = table.find_element(By.TAG_NAME, 'thead')
headers = [header.text.split() for header in thead.find_elements(By.TAG_NAME,'tr')][1]

# Get data from table
tbody = table.find_element(By.XPATH, '//*[@id="ratings"]/tbody')
rows = [row.text.split() for row in tbody.find_elements(By.TAG_NAME, 'tr')]

In [515]:
team_names = []

# Extract team names from table
names = tbody.find_elements(By.CLASS_NAME, 'left')

# Extract team names and add to array
for name in names:
    if name.text == 'Team':
        continue
    else:
        team_names.append(name.text)

team_names

['Boston Celtics',
 'Oklahoma City Thunder',
 'Philadelphia 76ers',
 'Minnesota Timberwolves',
 'Los Angeles Clippers',
 'New Orleans Pelicans',
 'Denver Nuggets',
 'New York Knicks',
 'Milwaukee Bucks',
 'Indiana Pacers',
 'Houston Rockets',
 'Cleveland Cavaliers',
 'Orlando Magic',
 'Sacramento Kings',
 'Dallas Mavericks',
 'Miami Heat',
 'Golden State Warriors',
 'Phoenix Suns',
 'Los Angeles Lakers',
 'Atlanta Hawks',
 'Toronto Raptors',
 'Brooklyn Nets',
 'Chicago Bulls',
 'Utah Jazz',
 'Memphis Grizzlies',
 'Portland Trail Blazers',
 'San Antonio Spurs',
 'Washington Wizards',
 'Charlotte Hornets',
 'Detroit Pistons']

In [516]:
data = []

for row in rows:
    # Find rows where the team name has two parts 'Boston Celtics'
    if len(row) == 16:
        row.pop(1)
        row.pop(1)
    # Find rows where the team name has three parts 'Oklahoma City Thunder'
    elif len(row) == 17:
        row.pop(1)
        row.pop(1)
        row.pop(1)
    elif row[0] == 'Unadjusted' or row[0] == 'Rk':
        continue  # Skip headers and other irrelevant rows
    data.append(row)
    
data

[['1',
  'E',
  'A',
  '29',
  '8',
  '.784',
  '10.35',
  '122.93',
  '112.36',
  '10.58',
  '10.96',
  '123.10',
  '111.92',
  '11.18'],
 ['2',
  'W',
  'NW',
  '25',
  '11',
  '.694',
  '7.58',
  '122.24',
  '114.61',
  '7.63',
  '7.86',
  '122.44',
  '114.52',
  '7.92'],
 ['3',
  'E',
  'A',
  '23',
  '13',
  '.639',
  '7.81',
  '120.61',
  '112.78',
  '7.83',
  '7.10',
  '120.03',
  '112.89',
  '7.14'],
 ['4',
  'W',
  'NW',
  '26',
  '11',
  '.703',
  '5.38',
  '115.60',
  '110.14',
  '5.46',
  '6.64',
  '116.01',
  '109.28',
  '6.73'],
 ['5',
  'W',
  'P',
  '24',
  '13',
  '.649',
  '5.70',
  '120.79',
  '115.03',
  '5.76',
  '5.30',
  '120.56',
  '115.20',
  '5.36'],
 ['6',
  'W',
  'SW',
  '23',
  '15',
  '.605',
  '5.00',
  '118.35',
  '113.23',
  '5.12',
  '4.92',
  '118.83',
  '113.80',
  '5.02'],
 ['7',
  'W',
  'NW',
  '26',
  '13',
  '.667',
  '4.95',
  '120.04',
  '114.88',
  '5.15',
  '4.78',
  '120.31',
  '115.34',
  '4.97'],
 ['8',
  'E',
  'A',
  '22',
  '15',
  '.

In [517]:
# Add respective team names to corresponding table data rows
for i in range(len(data)):
    data[i].insert(1, team_names[i])

In [518]:
data

[['1',
  'Boston Celtics',
  'E',
  'A',
  '29',
  '8',
  '.784',
  '10.35',
  '122.93',
  '112.36',
  '10.58',
  '10.96',
  '123.10',
  '111.92',
  '11.18'],
 ['2',
  'Oklahoma City Thunder',
  'W',
  'NW',
  '25',
  '11',
  '.694',
  '7.58',
  '122.24',
  '114.61',
  '7.63',
  '7.86',
  '122.44',
  '114.52',
  '7.92'],
 ['3',
  'Philadelphia 76ers',
  'E',
  'A',
  '23',
  '13',
  '.639',
  '7.81',
  '120.61',
  '112.78',
  '7.83',
  '7.10',
  '120.03',
  '112.89',
  '7.14'],
 ['4',
  'Minnesota Timberwolves',
  'W',
  'NW',
  '26',
  '11',
  '.703',
  '5.38',
  '115.60',
  '110.14',
  '5.46',
  '6.64',
  '116.01',
  '109.28',
  '6.73'],
 ['5',
  'Los Angeles Clippers',
  'W',
  'P',
  '24',
  '13',
  '.649',
  '5.70',
  '120.79',
  '115.03',
  '5.76',
  '5.30',
  '120.56',
  '115.20',
  '5.36'],
 ['6',
  'New Orleans Pelicans',
  'W',
  'SW',
  '23',
  '15',
  '.605',
  '5.00',
  '118.35',
  '113.23',
  '5.12',
  '4.92',
  '118.83',
  '113.80',
  '5.02'],
 ['7',
  'Denver Nuggets',


In [520]:
team_ratings = pd.DataFrame(data=data, columns=thead)
team_ratings

TypeError: 'WebElement' object is not iterable