In [25]:
import pandas as pd
import time
import tqdm
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select

# Player data

In [None]:
driverService = webdriver.EdgeService("C:\Windows\System32\msedgedriver.exe")
driver = webdriver.Edge(service = driverService)

In [95]:
driver.get(url="https://www.nba.com/stats/players/traditional?PerMode=PerGame&sort=PTS&dir=-1")

In [112]:
glossaryButton = driver.find_elements(By.CLASS_NAME, "Crom_base__f0niE")[0].find_element(By.TAG_NAME, "button")
glossaryButton.click()

acronyms = driver.find_elements(By.CLASS_NAME, "StatsTableGlossary_dt__GPooh")
acronyms = [a.text for a in acronyms]
definitions = driver.find_elements(By.CLASS_NAME, "StatsTableGlossary_dd__zcr38")
definitions = [d.text for d in definitions]

glossary = pd.DataFrame({"Type": "Player", "Variable": acronyms, "Definition": definitions})

In [482]:
def readTable(tableObj):
    rows = tableObj.find_elements(By.TAG_NAME, "tr")

    colNames = [i.text for i in rows[0].find_elements(By.TAG_NAME, "th") if len(i.text.strip()) > 0]
    rows = rows[1:]

    tableData = [[cell.text for cell in row.find_elements(By.TAG_NAME, "td")[1:] if len(cell.text) > 0] for row in tqdm.tqdm(rows)]

    table = pd.DataFrame(tableData, columns=colNames)

    return table

In [None]:
season_select_dd = Select(driver.find_elements(By.CLASS_NAME, "DropDown_select__4pIg9")[0])
seasons = [option.text for option in season_select_dd.options]

season_tables = []
for season in seasons:
    season_select_dd.select_by_visible_text(season)

    time.sleep(10)
    page_select_dd = driver.find_elements(By.CLASS_NAME, "DropDown_select__4pIg9")[-1]

    page_select_dd = Select(page_select_dd)
    page_select_dd.select_by_index(0)

    player_table = driver.find_element(By.CLASS_NAME, "Crom_table__p1iZz")

    time.sleep(10)
    
    player_table = readTable(player_table)
    season_tables.append(player_table)

In [372]:
def create_season_column(df, season):
    df["Season"] = season
    return df

In [93]:
seasons = [i for i in range(2016, 2024)]

playerdf = pd.concat([create_season_column(df, season).iloc[:,1:] for df, season in zip(season_tables, seasons)])
playerdf.to_csv("./data/playerTradStats.csv", index = False)

In [126]:
driver.close()

# Team Data

In [368]:
driverService = webdriver.EdgeService("C:\Windows\System32\msedgedriver.exe")
driver = webdriver.Edge(service = driverService)

driver.get(url="https://www.nba.com/stats/teams/traditional?PerMode=Totals")

In [457]:
def typeButtonClick():
    typeButton = driver.find_elements(By.CLASS_NAME, "StatsQuickNavSelector_nav__JzoME")[2].find_element(By.TAG_NAME, "button")
    typeButton.click()
    time.sleep(1)

def retTypeList(closeMenu = True):
    typeButtonClick()
    typeList = driver.find_elements(By.CLASS_NAME, "StatsQuickNavSelector_list__nb3l1")[2]
    typeList = typeList.find_elements(By.TAG_NAME, "li")
    if closeMenu:
        typeButtonClick()
    return typeList

def pickType(typeIndex):
    typeList = retTypeList(closeMenu = False)
    typeList[typeIndex].click()
    time.sleep(4)

In [446]:
typeList = retTypeList(closeMenu = False)
typeNames = [t.text for t in typeList]
typeButtonClick()

In [483]:
season_select_dd = Select(driver.find_elements(By.CLASS_NAME, "DropDown_select__4pIg9")[0])
seasons = [option.text for option in season_select_dd.options]

allTables = {}
for i, typeName in enumerate(typeNames[2:]):

    pickType(i+2)

    season_tables = {}
    for season in seasons:
        season_select_dd = Select(driver.find_elements(By.CLASS_NAME, "DropDown_select__4pIg9")[0])
        season_select_dd.select_by_visible_text(season)

        time.sleep(10)

        player_table = driver.find_element(By.CLASS_NAME, "Crom_table__p1iZz")
        
        player_table = readTable(player_table)
        season_tables[season] = (player_table)

    allTables[typeName] = season_tables

100%|██████████| 30/30 [00:09<00:00,  3.10it/s]
100%|██████████| 30/30 [00:11<00:00,  2.63it/s]
100%|██████████| 30/30 [00:12<00:00,  2.39it/s]
100%|██████████| 30/30 [00:09<00:00,  3.20it/s]
100%|██████████| 30/30 [00:10<00:00,  2.99it/s]
100%|██████████| 30/30 [00:09<00:00,  3.08it/s]
100%|██████████| 30/30 [00:09<00:00,  3.20it/s]
100%|██████████| 30/30 [00:09<00:00,  3.15it/s]
100%|██████████| 30/30 [00:09<00:00,  3.11it/s]
100%|██████████| 30/30 [00:09<00:00,  3.11it/s]
100%|██████████| 30/30 [00:09<00:00,  3.11it/s]
100%|██████████| 30/30 [00:09<00:00,  3.19it/s]
100%|██████████| 30/30 [00:09<00:00,  3.18it/s]
100%|██████████| 30/30 [00:13<00:00,  2.26it/s]
100%|██████████| 30/30 [00:08<00:00,  3.41it/s]
100%|██████████| 30/30 [00:09<00:00,  3.11it/s]
100%|██████████| 30/30 [00:08<00:00,  3.33it/s]
100%|██████████| 30/30 [00:09<00:00,  3.30it/s]
100%|██████████| 30/30 [00:10<00:00,  2.89it/s]
100%|██████████| 30/30 [00:09<00:00,  3.06it/s]
100%|██████████| 29/29 [00:08<00:00,  3.

In [514]:
def create_season_column(df, season):
    df["Season"] = season.split("-")[0]
    return df

advanceddf = pd.concat([create_season_column(df, season).iloc[:,1:] for season, df in allTables["Advanced"].items()])
advanceddf.to_csv("./data/teamAdvStats.csv", index = False)

In [516]:
for typeName in typeNames[2:]:
    teamdf = pd.concat([create_season_column(df, season) for season, df in allTables[typeName].items()])
    teamdf.to_csv(f"./data/team{typeName}Stats.csv", index = False)
driver.close()

# All player team awards

In [1]:
import requests
import bs4
import re

In [2]:
nbaAwardsPage = bs4.BeautifulSoup(requests.get("https://www.nba.com/news/history-all-nba-teams").text)

In [19]:
def isSeason(strObj):
    return re.match("\d{4}-\d{2}", strObj) is not None

def isAwardTitle(strObj):
    return re.match("[A-Z]+ TEAM", strObj) is not None

def isPlayerTeam(htmlObj):
    isPara = False
    containsComma = "," in htmlObj.text
    if len(re.findall("</*p>", htmlObj.decode())) == 2:
        isPara = True

    return  isPara and containsComma

In [4]:
seasons = [season for season in nbaAwardsPage.find_all("h3") if isSeason(season.text)]

In [35]:
dataDict = {}
for season in seasons:
    children = season.find_next_siblings()
    seasonDict = {}

    awardTitle = None
    playerTeam = []

    for child in children:

        if isAwardTitle(child.text):
            if awardTitle is not None:
                seasonDict[awardTitle] = playerTeam
                playerTeam = []
            awardTitle = child.text.replace(":", "")
        
        if isPlayerTeam(child):
            playerTeam.append(child.text)
        
        if isSeason(child.text):
            seasonDict[awardTitle] = playerTeam
            break

        child = None
    
    if child is None:
        seasonDict[awardTitle] = playerTeam
        
    dataDict[season.text] = seasonDict

In [80]:
awardsdf = pd.DataFrame.from_dict(dataDict, orient = "index").stack().explode().str.split(",", expand = True)
awardsdf[["Position", 0]] = awardsdf[0].str.extract("(([A-Z]):){0,1}(.*)")[[1, 2]].rename({1: "Position", 2: "Player"}, axis = 1)
awardsdf = awardsdf.rename({0: "Player", 1: "Team"}, axis = 1)
awardsdf["Player"] = awardsdf.Player.str.strip()
awardsdf.to_csv("./data/awards.csv")