In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
from datetime import datetime, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium_stealth import stealth
import undetected_chromedriver as uc

In [14]:
def initDriver(url, driver):
    if driver is None: 
        options = Options()
        options.add_argument('--blink-settings=imagesEnabled=false')
        driver = uc.Chrome(options=options)
        
    driver.get(url)
    return driver

In [3]:
def findPlayer(driver, player, team, league):
    time.sleep(1)
    items = driver.find_elements(By.XPATH, '//li[@class="player__Results__Item"]')
    teams = driver.find_elements(By.XPATH, '//div[@class="LogoTile__Meta LogoTile__Meta--subtitle"]')
    leagues = driver.find_elements(By.XPATH, '//div[@class="LogoTile__Meta LogoTile__Meta--category"]')
    
    for i in range(len(items)):
        if leagues[i].text == league:
            items[i].click()
            break
        
    return None

In [4]:
def findStats(driver, player, team, league):
    time.sleep(1)
    driver.find_element(By.XPATH, '//span[text()="Game Log"]').click()
    return None

In [5]:
def scrapeStats(driver, player, team, league, date):
    time.sleep(1)
    row = driver.find_element(By.CLASS_NAME, "Table__sub-header")
    categories = row.find_elements(By.CSS_SELECTOR, "th.Table__TH")
    row = driver.find_element(By.XPATH, './/tr[@data-idx="0"]')
    stats = row.find_elements(By.CSS_SELECTOR, "td.Table__TD")
    players = {'Name': player, 'Team': team, 'League': league, 'Date': date}
    
    if date.date() == datetime.strptime(stats[0].text[4:] + '/2023', '%m/%d/%Y').date():  
        for i in range(len(categories)):
            players[categories[i].text] = stats[i].text
            
    return players

In [6]:
def run(df_query):
    statLst = []
    driver = None
    
    try:
        for index, row in df_query.iterrows():
            player, team, league, date = row["Name"], row["Team"], row["League"], row["Date"]
            url = 'https://www.espn.com/search/_/type/players/q/' + player.replace(' ', '%20')
            driver = initDriver(url, driver)
            findPlayer(driver, player, team, league)
            findStats(driver, player, team, league)
            statLst.append(scrapeStats(driver, player, team, league, date))
    finally:
        driver.quit()
            
    return pd.DataFrame(statLst)

In [7]:
data = [['Tyler Anderson', 'LAA', 'MLB', datetime(2023, 6, 6, 0, 0)], ['Thairo Estrada', 'SFG', 'MLB', datetime(2023, 6, 10, 0, 0)]]
  
# Create the pandas DataFrame
df_query = pd.DataFrame(data, columns=['Name', 'Team', 'League', 'Date'])

df = run(df_query)

In [8]:
dfCopy = df.copy()

In [9]:
df = dfCopy.copy()

In [10]:
df.shape

(2, 34)

In [11]:
df.columns

Index(['Name', 'Team', 'League', 'Date', 'DATE', 'OPP', 'RESULT', 'IP', 'H',
       'R', 'ER', 'HR', 'BB', 'K', 'GB', 'FB', 'P', 'TBF', 'GSC', 'DEC', 'REL',
       'ERA', 'AB', '2B', '3B', 'RBI', 'HBP', 'SO', 'SB', 'CS', 'AVG', 'OBP',
       'SLG', 'OPS'],
      dtype='object')

In [12]:
df.head()

Unnamed: 0,Name,Team,League,Date,DATE,OPP,RESULT,IP,H,R,...,3B,RBI,HBP,SO,SB,CS,AVG,OBP,SLG,OPS
0,Tyler Anderson,LAA,MLB,2023-06-06,Tue 6/6,vs\nCHC,W\n7-4,5.0,5,4,...,,,,,,,,,,
1,Thairo Estrada,SFG,MLB,2023-06-10,Sat 6/10,vs\nCHC,L\n4-0,,0,0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.293,0.339,0.47,0.809


In [13]:
df.tail()

Unnamed: 0,Name,Team,League,Date,DATE,OPP,RESULT,IP,H,R,...,3B,RBI,HBP,SO,SB,CS,AVG,OBP,SLG,OPS
0,Tyler Anderson,LAA,MLB,2023-06-06,Tue 6/6,vs\nCHC,W\n7-4,5.0,5,4,...,,,,,,,,,,
1,Thairo Estrada,SFG,MLB,2023-06-10,Sat 6/10,vs\nCHC,L\n4-0,,0,0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.293,0.339,0.47,0.809
