In [1]:
# Importing libraries

import numpy as np
import pandas as pd
from datetime import datetime, date
import re

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys

In [2]:
def scrape_player_data(player_name, player_data = None):
    driver = webdriver.Chrome()  # Replace with your preferred web driver (e.g., Firefox)

    url = 'https://gol.gg/esports/home/'  # Replace with the website URL

    try:
        driver.get(url)

        # Find the search bar and input the player name
        parent_div = driver.find_element(By.CSS_SELECTOR, 'div.selectize-input.items.not-full.has-options')  # Find the input inside the div with class 'selectize-input'
        search_input = parent_div.find_element(By.TAG_NAME, 'input')

        search_input.send_keys(player_name)
        search_input.send_keys(Keys.ENTER)
        # Wait for the player page to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))  # Replace 'player-info' with an appropriate identifier for the player info

        if player_data == None:
            player_data = {'Player': [], 'Event': []}

        # Find the dropdown selector for seasons
        seasons_dropdown = driver.find_element(By.ID, 'cbtournament')  # Replace 'seasons' with the actual name of the dropdown
        seasons = [option.get_attribute('value') for option in seasons_dropdown.find_elements(By.TAG_NAME, 'option')]

        for season in seasons:
            # Select the season from the dropdown
            seasons_dropdown = driver.find_element(By.ID, 'cbtournament')  # Replace 'seasons' with the actual name of the dropdown
            seasons_dropdown.send_keys(season)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))

            table_elements = driver.find_elements(By.CLASS_NAME, 'table_list')
            
            player_data['Player'].append(player_name)
            player_data['Event'].append(season)

            for i, table in enumerate(table_elements):
                # Check if the table contains the desired th element with specific text
                th_element = table.find_element(By.TAG_NAME, 'th')
                if th_element.text == 'AGGRESSION' or th_element.text == "VISION" or th_element.text == "EARLY GAME": 
                    # Extract data from the table and add to the season_data dictionary
                    table_data = {}
                    rows = table.find_elements(By.TAG_NAME, 'tr')
                    for row in rows:
                        columns = row.find_elements(By.TAG_NAME, 'td')
                        row_data = [column.text.strip() for column in columns]
                        if len(row_data) and row_data[0] != '':
                            try:
                                player_data[row_data[0]].append(row_data[1])
                            except:
                                player_data[row_data[0]] = [row_data[1]]

                            table_data[row_data[0]] = row_data[1]

        return player_data

    except TimeoutException:
        print(f"Timed out while searching data for {player_name}")
        return None

    finally:
        driver.quit()

if __name__ == '__main__':
    players = ['113', 'Elyoya', "Caps", "Razork", "Yike", "Exakick", "Upset", "Humanoid", "Hylissang", "Nisqy"]

    all_players_data = []
    for i,player in enumerate(players):
        if i == 0:
            player_data = scrape_player_data(player)
            print(f"{player} scrapped correctly!")
        else:
            player_data = scrape_player_data(player, player_data)
            print(f"{player} scrapped correctly!")



113 scrapped correctly!
Elyoya scrapped correctly!
Caps scrapped correctly!
Razork scrapped correctly!
Yike scrapped correctly!
Exakick scrapped correctly!
Upset scrapped correctly!
Humanoid scrapped correctly!
Hylissang scrapped correctly!
Nisqy scrapped correctly!


In [10]:
df = pd.DataFrame.from_dict(player_data)

In [11]:
df

Unnamed: 0,Player,Event,Ahead in CS at 15 min:,CS Differential at 15 min:,Gold Differential at 15 min:,XP Differential at 15 min:,First Blood Participation:,First Blood Victim:,Damage Per Minute:,Damage%:,K+A Per Minute:,Solo kills:,Pentakills:,Vision score Per Minute:,Ward Per Minute:,Vision Ward Per Minute:,Ward Cleared Per Minute:
0,113,ALL,22.2%,-5.4,-155,-467,33.3%,0%,302.1,16.2%,0.24,1,0,1.15,0.28,0.28,0.23
1,113,LEC Summer 2023,22.2%,-5.4,-155,-467,33.3%,0%,302.1,16.2%,0.24,1,0,1.15,0.28,0.28,0.23
2,113,LEC Spring Groups 2023,0%,-23,-1010,-953,42.9%,42.9%,336.9,15.1%,0.3,-,0,1.11,0.24,0.23,0.21
3,113,LEC Spring Season 2023,11.1%,-16.4,-139,-495,44.4%,11.1%,339,16.2%,0.37,-,0,1.64,0.5,0.35,0.28
4,113,LEC Winter Groups 2023,25%,-8.9,-248,-151,12.5%,25%,280.4,13%,0.3,1,0,1.6,0.22,0.24,0.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316,Nisqy,EU LCS Spring 2018,26.3%,-6.7,-210,-259,36.8%,5.3%,429.2,25.2%,0.17,2,0,1.13,0.57,0.21,0.27
317,Nisqy,NA LCS Summer Playoffs 2017,0%,-10.8,-109,-173,40%,0%,518.8,27.2%,0.25,1,0,-,0.5,0.14,0.28
318,Nisqy,NA LCS Summer 2017,57.7%,-1.7,-66,-32,26.9%,15.4%,523.3,28.4%,0.21,8,0,-,0.44,0.16,0.19
319,Nisqy,EU LCS Summer Promotion 2017,75%,+4.8,-9,+127,50%,8.3%,474.4,27.6%,0.27,4,0,-,0.43,0.12,0.19


In [12]:
# Data formatting

def percentage_to_integer(percentage_str):
    if percentage_str.strip() == '-':
        return float('nan')
    return int(float(percentage_str.strip('%')) + 0.5)


df['Solo kills:'].replace('-', 0, inplace = True)
df.loc[:, ["Ahead in CS at 15 min:", "First Blood Participation:", "First Blood Victim:", "Damage%:"]] = df.loc[:, ["Ahead in CS at 15 min:", "First Blood Participation:", "First Blood Victim:", "Damage%:"]].applymap(percentage_to_integer)

new_column_names = {
    'Ahead in CS at 15 min:': 'AheadCS@15%',
    'First Blood Participation:': 'FB_Participation%',
    'First Blood Victim:': 'FB_Victim%',
    'CS Differential at 15 min:': 'CSD@15',
    'Gold Differential at 15 min:': 'GD@15',
    'XP Differential at 15 min:': 'XPD@15',
    'Damage Per Minute:': 'DPM',
    'Damage%:': 'DMG%',
    'K+A Per Minute:': 'KA_PM',
    'Solo kills:': 'SoloKills',
    'Pentakills:': 'Pentakills',
    'Vision score Per Minute:': 'VSPM',
    'Ward Per Minute:': 'WPM',
    'Vision Ward Per Minute:': 'VWPM',
    'Ward Cleared Per Minute:': 'WCPM'
}
df.rename(columns=new_column_names, inplace=True)

In [28]:
df.replace('-', np.nan, inplace=True)
df['Event'] = df['Event'].str.replace(r'Mid-Season Invitational (\d+)', r'MSI \1')
df['Event'] = df['Event'].str.replace(r'World Championship (\d+)', r'Worlds \1')

  df['Event'] = df['Event'].str.replace(r'World Championship (\d+)', r'Worlds \1')


In [31]:
df['Event'] = df['Event'].str.replace(r'EU Masters', 'EM')

In [32]:
df

Unnamed: 0,Player,Event,AheadCS@15%,CSD@15,GD@15,XPD@15,FB_Participation%,FB_Victim%,DPM,DMG%,KA_PM,SoloKills,Pentakills,VSPM,WPM,VWPM,WCPM
0,113,ALL,22.0,-5.4,-155,-467,33.0,0.0,302.1,16.0,0.24,1,0,1.15,0.28,0.28,0.23
1,113,LEC Summer 2023,22.0,-5.4,-155,-467,33.0,0.0,302.1,16.0,0.24,1,0,1.15,0.28,0.28,0.23
2,113,LEC Spring Groups 2023,0.0,-23,-1010,-953,43.0,43.0,336.9,15.0,0.3,0,0,1.11,0.24,0.23,0.21
3,113,LEC Spring Season 2023,11.0,-16.4,-139,-495,44.0,11.0,339,16.0,0.37,0,0,1.64,0.5,0.35,0.28
4,113,LEC Winter Groups 2023,25.0,-8.9,-248,-151,13.0,25.0,280.4,13.0,0.3,1,0,1.6,0.22,0.24,0.45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316,Nisqy,EU LCS Spring 2018,26.0,-6.7,-210,-259,37.0,5.0,429.2,25.0,0.17,2,0,1.13,0.57,0.21,0.27
317,Nisqy,NA LCS Summer Playoffs 2017,0.0,-10.8,-109,-173,40.0,0.0,518.8,27.0,0.25,1,0,,0.5,0.14,0.28
318,Nisqy,NA LCS Summer 2017,58.0,-1.7,-66,-32,27.0,15.0,523.3,28.0,0.21,8,0,,0.44,0.16,0.19
319,Nisqy,EU LCS Summer Promotion 2017,75.0,+4.8,-9,+127,50.0,8.0,474.4,28.0,0.27,4,0,,0.43,0.12,0.19


In [33]:
# Save as csv
df.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/PlayersGolggStats.csv', index = False)