In [9]:
# Importing libraries

import numpy as np
import pandas as pd
from datetime import datetime, date
import re

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys

In [85]:
def scrape_player_data(player_name, player_data = None):
    driver = webdriver.Chrome()  # Replace with your preferred web driver (e.g., Firefox)

    url = 'https://gol.gg/esports/home/'  # Replace with the website URL

    try:
        driver.get(url)

        # Find the search bar and input the player name
        parent_div = driver.find_element(By.CSS_SELECTOR, 'div.selectize-input.items.not-full.has-options')  # Find the input inside the div with class 'selectize-input'
        search_input = parent_div.find_element(By.TAG_NAME, 'input')

        search_input.send_keys(player_name)
        search_input.send_keys(Keys.ENTER)
        # Wait for the player page to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))  # Replace 'player-info' with an appropriate identifier for the player info

        if player_data == None:
            player_data = {'Player': [], 'Event': []}

        # Find the dropdown selector for seasons
        seasons_dropdown = driver.find_element(By.ID, 'cbtournament')  # Replace 'seasons' with the actual name of the dropdown
        seasons = [option.get_attribute('value') for option in seasons_dropdown.find_elements(By.TAG_NAME, 'option')]

        for season in seasons:
            # Select the season from the dropdown
            seasons_dropdown = driver.find_element(By.ID, 'cbtournament')  # Replace 'seasons' with the actual name of the dropdown
            seasons_dropdown.send_keys(season)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))

            table_elements = driver.find_elements(By.CLASS_NAME, 'table_list')
            
            player_data['Player'].append(player_name)
            player_data['Event'].append(season)

            for i, table in enumerate(table_elements):
                # Check if the table contains the desired th element with specific text
                th_element = table.find_element(By.TAG_NAME, 'th')
                if th_element.text == 'AGGRESSION' or th_element.text == "VISION" or th_element.text == "EARLY GAME": 
                    # Extract data from the table and add to the season_data dictionary
                    table_data = {}
                    rows = table.find_elements(By.TAG_NAME, 'tr')
                    for row in rows:
                        columns = row.find_elements(By.TAG_NAME, 'td')
                        row_data = [column.text.strip() for column in columns]
                        if len(row_data) and row_data[0] != '':
                            try:
                                player_data[row_data[0]].append(row_data[1])
                            except:
                                player_data[row_data[0]] = [row_data[1]]

                            table_data[row_data[0]] = row_data[1]

        return player_data

    except TimeoutException:
        print(f"Timed out while searching data for {player_name}")
        return None

    finally:
        driver.quit()

if __name__ == '__main__':
    players = ['Alvaro', '113']  # Replace this with your list of player names

    all_players_data = []
    for i,player in enumerate(players):
        if i == 0:
            player_data = scrape_player_data(player)
            print(f"{player} scrapped correctly!")
        else:
            player_data = scrape_player_data(player, player_data)
            print(f"{player} scrapped correctly!")



Alvaro scrapped correctly!
113 scrapped correctly!


In [101]:
df = pd.DataFrame.from_dict(player_data)

In [102]:
# Data formatting

def percentage_to_integer(percentage_str):
    return int(float(percentage_str.strip('%')) + 0.5)

df['Solo kills:'].replace('-', 0, inplace = True)
df.loc[:, ["Ahead in CS at 15 min:", "First Blood Participation:", "First Blood Victim:", "Damage%:"]] = df.loc[:, ["Ahead in CS at 15 min:", "First Blood Participation:", "First Blood Victim:", "Damage%:"]].applymap(percentage_to_integer)

new_column_names = {
    'Ahead in CS at 15 min:': 'AheadCS@15%',
    'First Blood Participation:': 'FB_Participation%',
    'First Blood Victim:': 'FB_Victim%'
}
df.rename(columns=new_column_names, inplace=True)

In [103]:
df

Unnamed: 0,Player,Event,AheadCS@15%,CS Differential at 15 min:,Gold Differential at 15 min:,XP Differential at 15 min:,FB_Participation%,FB_Victim%,Damage Per Minute:,Damage%:,K+A Per Minute:,Solo kills:,Pentakills:,Vision score Per Minute:,Ward Per Minute:,Vision Ward Per Minute:,Ward Cleared Per Minute:
0,Alvaro,ALL,44,-1.5,380,-108,28,6,215.3,9,0.45,0,0,2.92,1.56,0.43,0.29
1,Alvaro,SuperLiga Summer 2023,44,-1.5,380,-108,28,6,215.3,9,0.45,0,0,2.92,1.56,0.43,0.29
2,Alvaro,EMEA Masters Spring 2023,46,-0.4,255,152,36,9,216.2,8,0.59,2,0,2.69,1.39,0.4,0.28
3,Alvaro,SuperLiga Spring Playoffs 2023,57,0.3,404,98,36,29,212.5,9,0.5,1,0,2.92,1.56,0.43,0.33
4,Alvaro,SuperLiga Spring 2023,56,1.6,259,200,39,17,214.7,10,0.42,0,0,3.19,1.22,0.42,0.43
5,Alvaro,LVP SL Summer Playoffs 2022,0,-12.8,38,101,40,20,241.9,12,0.18,1,0,2.68,1.68,0.43,0.34
6,Alvaro,LVP SL Summer 2022,6,-8.1,183,-135,39,22,260.1,10,0.35,0,0,2.67,1.68,0.48,0.32
7,113,ALL,22,-5.4,-155,-467,33,0,302.1,16,0.24,1,0,1.15,0.28,0.28,0.23
8,113,LEC Summer 2023,22,-5.4,-155,-467,33,0,302.1,16,0.24,1,0,1.15,0.28,0.28,0.23
9,113,LEC Spring Groups 2023,0,-23.0,-1010,-953,43,43,336.9,15,0.3,0,0,1.11,0.24,0.23,0.21


In [92]:
df.loc[:, ["Ahead in CS at 15 min:", "First Blood Participation:", "First Blood Victim:", "Damage%:"]].replace("%", "")

Unnamed: 0,Ahead in CS at 15 min:,First Blood Participation:,First Blood Victim:,Damage%:
0,44.4%,27.8%,5.6%,9.2%
1,44.4%,27.8%,5.6%,9.2%
2,45.5%,36.4%,9.1%,8.4%
3,57.1%,35.7%,28.6%,8.5%
4,55.6%,38.9%,16.7%,9.7%
5,0%,40%,20%,11.8%
6,5.6%,38.9%,22.2%,10.4%
7,22.2%,33.3%,0%,16.2%
8,22.2%,33.3%,0%,16.2%
9,0%,42.9%,42.9%,15.1%


In [64]:
all_players_data[0].keys()

dict_keys(['Player', 'Event', 'Ahead in CS at 15 min:', 'CS Differential at 15 min:', 'Gold Differential at 15 min:', 'XP Differential at 15 min:', 'First Blood Participation:', 'First Blood Victim:', 'Damage Per Minute:', 'Damage%:', 'K+A Per Minute:', 'Solo kills:', 'Pentakills:', 'Vision score Per Minute:', 'Ward Per Minute:', 'Vision Ward Per Minute:', 'Ward Cleared Per Minute:'])

In [66]:
all_players_data[0]['Ahead in CS at 15 min:']

['44.4%', '44.4%', '45.5%', '57.1%', '55.6%', '0%', '5.6%']