## Getting the Website Source Code

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

PATH = '/Users/andrewlee/chromedriver'
service = Service(PATH)
driver = webdriver.Chrome(service=service)

driver.get("https://nba.com/players")

wait = WebDriverWait(driver, 60)

# Wait for the title element to appear before continuing
title_element = wait.until(EC.presence_of_element_located((By.TAG_NAME, "title")))

# Select "All" option from dropdown
dropdown_element = driver.find_element(By.XPATH, "//select[@title='Page Number Selection Drown Down List']")
dropdown = Select(dropdown_element)
dropdown.select_by_visible_text("All")

# Wait for the dropdown element to be updated
updated_dropdown_element = wait.until(EC.visibility_of_element_located((By.XPATH, "//select[@title='Page Number Selection Drown Down List']")))

# Get the page source after the dropdown has been updated
page_source = driver.page_source

driver.quit()

## Converting the Source Code Into a BS Object and Grabbing Links/Player Names

In [2]:
from bs4 import BeautifulSoup as bs
import re

current_player = []
names = []

soup = bs(page_source, 'html.parser')

# filtering out the elements which do not contain player names or links
player_names = soup.find_all(class_="RosterRow_playerName__G28lg")
player_links = soup.find_all(class_="Anchor_anchor__cSc3P RosterRow_playerLink__qw1vG")

for link in player_links:
    href = link.get("href")
    current_player.append(href)

# function to fix links
def fix_link(string):
    pattern = r'(/.*/[^/]+/).*'
    result = re.sub(pattern, r'\1', string)
    return "https://nba.com/stats" + result.rstrip('/')

# create a list of player links with fixed format
player_links_fixed = [fix_link(link) for link in current_player]

for player in player_names:
    full_name = "-".join(player.stripped_strings)
    full_name = full_name.replace(" ", "-")
    names.append(full_name)

## Grabbing Data from Player Stats Page

In [3]:
import requests
import json
from bs4 import BeautifulSoup as bs
import re

player_figs = []
no_stat_players = []
stat_names = ['+/-',  '3P%',  '3PA',  '3PM',  'APG',  'AST',  'BLK',  'BPG',  'FG%',  'FGA',  'FGM',  'FT%',  'FTA',  'FTM',  'GP',  'MIN',  'MPG',  'PF',  'PPG',  'PTS',  'REB',  'RPG',  'SEASON',  'SPG',  'STL',   'TOV',  'TPG', 'TM']

# Gets the text out the parenthesis
def extract_text_within_parentheses(string):
    match = re.search(r'\((.*?)\)', string)
    if match:
        return match.group(1)

# Storing/Making the links for the player stats
muse_links = []
for player in names:
        muse_links.append(f"https://www.statmuse.com/nba/ask/{player}-career-stats")

def get_player_figs(player_links_fixed, names):
# Send get request to get basic player information
    for index, test in enumerate(player_links_fixed):
        temp_figs = []
        r = requests.get(test)
        test_r = bs(r.content)

# Ensuring that players with no stats are not added to the dataset 
        try:
            pie = test_r.find_all(class_="PlayerSummary_playerStatValue___EDg_")[3].text
        except IndexError:
            no_stat_players.append(names[index])
            continue
        
# Grabbing element that contains the basic player info and storing that info
        player_fax_html = test_r.find_all(class_="PlayerSummary_playerInfoValue__JS8_v")

        for i in range(0,8):    
            temp_figs.append((player_fax_html[i].text))

        for i in range(0,2):
            temp_figs[i] = extract_text_within_parentheses(temp_figs[i])

            if i == 0:
                temp_figs[i] = temp_figs[i][0:(len(temp_figs[i])-1)]
            elif i == 1:
                temp_figs[i] = temp_figs[i][0:(len(temp_figs[i])-2)]

# Sending request to get actual player stats
        if len(temp_figs) == 8 and names[index]:
            temp_figs.append(names[index])
            
            stat_muse = requests.get(muse_links[index])
            muse_html = stat_muse.content
            
            soup2 = bs(muse_html, 'html.parser')
            stats_json_html = soup2.find("visual-answer")['answer']
            
            stats_json = json.loads(stats_json_html)
            
            season_stats = stats_json["visual"]["detail"][0]["grids"][0]["rows"]
            
# Adding the complete player info to the final dataset        
            if "SEASON" in season_stats[0]:
                for season in season_stats:
                    temp_statz = []
                    for stat_type in stat_names:
                        temp_statz.append(season[stat_type]["display"])
                    player_figs.append(temp_figs+temp_statz)
        temp_figs = []

get_player_figs(player_links_fixed, names)




## Saving the Dataset as CSV

In [4]:
import pandas as pd

player_figs_header = ['HEIGHT', 'WEIGHT', 'COUNTRY', 'LAST ATTENDED', 'AGE', 'BIRTHDATE', 'DRAFT', 'EXPERIENCE', 'NAME', '+/-',  '3P%',  '3PA',  '3PM',  'APG',  'AST',  'BLK',  'BPG',  'FG%',  'FGA',  'FGM',  'FT%',  'FTA',  'FTM',  'GP',  'MIN',  'MPG',  'PF',  'PPG',  'PTS',  'REB',  'RPG',  'SEASON',  'SPG',  'STL',   'TOV',  'TPG', 'TM']

# create DataFrame
df = pd.DataFrame(player_figs, columns=player_figs_header)

# print DataFrame
df.to_csv('player_stats_updated2.csv', index=False)