<a href="https://colab.research.google.com/github/btoneil2021/basketball-sql/blob/main/BasketballProspectCondensed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%shell
pip install chromedriver_autoinstaller
pip install selenium

Collecting chromedriver_autoinstaller
  Downloading chromedriver_autoinstaller-0.6.4-py3-none-any.whl.metadata (2.1 kB)
Downloading chromedriver_autoinstaller-0.6.4-py3-none-any.whl (7.6 kB)
Installing collected packages: chromedriver_autoinstaller
Successfully installed chromedriver_autoinstaller-0.6.4
Collecting selenium
  Downloading selenium-4.30.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.29.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.30.0-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m36.6 MB/s[0



In [None]:
import sys
import chromedriver_autoinstaller
from selenium import webdriver
from bs4 import BeautifulSoup
import re
import threading
import unicodedata

In [None]:
def get_soup(url):
  chrome_options = webdriver.ChromeOptions()
  chrome_options.add_argument('--headless')
  chrome_options.add_argument('--no-sandbox')
  chrome_options.add_argument('--disable-dev-shm-usage')

  driver = webdriver.Chrome(options=chrome_options)

  driver.get(url)

  driver.implicitly_wait(1)

  soup = BeautifulSoup(driver.page_source, 'html.parser')

  driver.quit()

  return soup

In [None]:
# Grab Individual Links
soup = get_soup('https://www.tankathon.com/mock_draft')

mock_rows = soup.find('div', {'class': 'mock-rows'})

player_names_unstripped = mock_rows.find_all('div', {'class': 'mock-row-name'})
print(player_names_unstripped)

player_names = []
for div in player_names_unstripped:
  player_names.append(div.text)

print(player_names)

# Make links out of all the names
player_links = []

for name in player_names:
  # Normalize and remove accents
  final_name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('utf-8')
  # Lowercase, replace non-word chars/spaces with hyphens
  final_name = re.sub(r'\W+', '-', final_name.strip().lower())
  final_name = final_name.strip('-')
  player_links.append(f'https://www.tankathon.com/players/{final_name}')

print(player_links)

[<div class="mock-row-name">Cooper Flagg</div>, <div class="mock-row-name">Dylan Harper</div>, <div class="mock-row-name">Ace Bailey</div>, <div class="mock-row-name">V.J. Edgecombe</div>, <div class="mock-row-name">Kasparas Jakucionis</div>, <div class="mock-row-name">Khaman Maluach</div>, <div class="mock-row-name">Tre Johnson</div>, <div class="mock-row-name">Kon Knueppel</div>, <div class="mock-row-name">Asa Newell</div>, <div class="mock-row-name">Derik Queen</div>, <div class="mock-row-name">Jeremiah Fears</div>, <div class="mock-row-name">Egor Demin</div>, <div class="mock-row-name">Collin Murray-Boyles</div>, <div class="mock-row-name">Jase Richardson</div>, <div class="mock-row-name">Liam McNeeley</div>, <div class="mock-row-name">Nolan Traore</div>, <div class="mock-row-name">Noa Essengue</div>, <div class="mock-row-name">Ben Saraf</div>, <div class="mock-row-name">Thomas Sorber</div>, <div class="mock-row-name">Hugo González</div>, <div class="mock-row-name">Will Riley</div>

In [None]:
class PlayerInfo():
  def __init__(self, soup):
    self.soup = soup
    self.player_info = PlayerInfo.get_player_info()

  def grab_specific_info(self, block_type='div', class_name='None'):
    return self.soup.find(block_type, {'class': class_name}).get_text(strip=True)

  def grab_data_block(self, label_name):
    for block in self.soup.select('.data-section .data-block'):
        label = block.select_one('.label')
        data = block.select_one('.data')
        if label and data and label.get_text(strip=True) == label_name:
            return data.get_text(strip=True)
    return None

  def grab_data_block_alt(self, label_name):
    for block in self.soup.select('.data-section .data-block'):
        label = block.select_one('.label')
        data = block.select_one('.data')
        if label and data and label.get_text(strip=True).lower() == label_name.lower():
            # Prefer .desktop span if present
            desktop = data.select_one('.desktop')
            if desktop:
                return desktop.get_text(strip=True)
            return data.get_text(strip=True)
    return None

  def grab_stat_block(self, label_name):
    for container in self.soup.select('.stat-row .stat-container'):
        label = container.select_one('.stat-label')
        data = container.select_one('.stat-data')
        if label and data and label.get_text(strip=True) == label_name:
            return data.get_text(strip=True)
    return None

  def grab_stat_block_alt(self, label_name):
    for container in self.soup.select('.stat-row .stat-container'):
        label = container.select_one('.stat-label')
        data = container.select_one('.stat-data')
        if label and data:
            label_text = label.get_text(separator=' ', strip=True)
            if label_name.lower() in label_text.lower():
                return data.get_text(strip=True)
    return None

  def grab_mock_draft_info(self):
    for block in self.soup.select('.data-block'):
        label = block.select_one('.label a')
        if label and label.text.strip() == 'Mock Draft':
            draft_pos = block.select_one('.data a:nth-of-type(1)').text.strip()
            projected_team = block.select_one('.data a:nth-of-type(2)').text.strip()
            return {
                'mock_draft_position': draft_pos,
                'mock_draft_projected_team': projected_team
            }
    return None

  def get_player_info():
    info = {}

    # General Info
    info['Name'] = PlayerInfo.grab_specific_info('h1', 'page-title')
    info['School Year'] = PlayerInfo.grab_data_block('Year')
    info['Position'] = PlayerInfo.grab_data_block('Position')
    info['Height'] = PlayerInfo.grab_data_block('Height')
    info['Weight'] = PlayerInfo.grab_data_block('Weight')
    try:
      info['Mock Draft Position'] = PlayerInfo.grab_mock_draft_info()['mock_draft_position']
      info['Mock Draft Projected Team'] = PlayerInfo.grab_mock_draft_info()['mock_draft_projected_team']
    except:
      info['Mock Draft Position'] = None
      info['Mock Draft Projected Team'] = None
    info['Age At Draft'] = PlayerInfo.grab_data_block('Age at Draft')
    info['Birthday'] = PlayerInfo.grab_data_block('Birthdate')
    info['Nation'] = PlayerInfo.grab_data_block('Nation')
    info['Hometown'] = PlayerInfo.grab_data_block('Hometown')
    info['High School'] = PlayerInfo.grab_data_block_alt('High School') # alt to avoid doubling
    info['ESPN 100'] = PlayerInfo.grab_data_block('ESPN 100')


    # 2024-2025 Per Game Averages
    info['Games Played'] = PlayerInfo.grab_stat_block('G')
    info['Minutes Per Game'] = PlayerInfo.grab_stat_block('MP')
    info['FGM to FGA'] = PlayerInfo.grab_stat_block('FGM-FGA')
    info['FG Percentage'] = PlayerInfo.grab_stat_block('FG%')
    info['3PM to 3PA'] = PlayerInfo.grab_stat_block('3PM-3PA')
    info['3P Percentage'] = PlayerInfo.grab_stat_block('3P%')
    info['FT to FGA'] = PlayerInfo.grab_stat_block('FTM-FTA')
    info['FT Percentage'] = PlayerInfo.grab_stat_block('FT%')
    info['Rebounds'] = PlayerInfo.grab_stat_block('REB')
    info['Assists'] = PlayerInfo.grab_stat_block('AST')
    info['Blocks'] = PlayerInfo.grab_stat_block('BLK')
    info['Steals'] = PlayerInfo.grab_stat_block('STL')
    info['Turnovers'] = PlayerInfo.grab_stat_block('TO')
    info['Fouls'] = PlayerInfo.grab_stat_block('PF')
    info['Points Per Game'] = PlayerInfo.grab_stat_block('PTS')


    # Advanced Stats
    info['True Shooting Percentage'] = PlayerInfo.grab_stat_block_alt('True Shooting %')
    info['Effective FG%'] = PlayerInfo.grab_stat_block_alt('Effective FG%')
    info['3PA Rate'] = PlayerInfo.grab_stat_block_alt('3PA Rate')
    info['Free Throw Rate'] = PlayerInfo.grab_stat_block_alt('FTA Rate')
    info['Projected NBA 3P%'] = PlayerInfo.grab_stat_block_alt('Proj NBA 3P%')
    info['Usage %'] = PlayerInfo.grab_stat_block('USG%')
    info['Assits to Usage'] = PlayerInfo.grab_stat_block('AST/USG')
    info['Assists to Turnovers'] = PlayerInfo.grab_stat_block('AST/TO')

    return info

In [None]:
player_info = PlayerInfo(get_soup(player_links[0]))
print(player_info.player_info['Name'])

AttributeError: 'str' object has no attribute 'soup'

In [None]:
print(player_info.player_info['Age At Draft'])

None
