In [1]:
# Importing libraries

import numpy as np
import pandas as pd
from datetime import datetime, date
import re

from selenium import webdriver
from selenium.webdriver.common.by import By

In [3]:
# Scrap players data (all history)

players_name = ['113', 'Elyoya', "Caps", "Razork", "Yike", "Exakick", "Upset", "Humanoid", "Hylissang", "Nisqy"]

# Set up the Chrome WebDriver
driver = webdriver.Chrome()

# List where we will store the data
data_table = []

for player_name in players_name:
    url = f"https://oracleselixir.com/player/{player_name}/statsBySplit"
    driver.get(url)

    # Find the table on the page (adjust the table selection based on the specific website structure)
    div_element = driver.find_elements(By.CSS_SELECTOR, "div.sc-eFRcpv.gDBTwW")

    player_original_name = driver.find_element(By.CLASS_NAME, "sc-dovdUy.gBqzXj").text

    for item in div_element:
        data_list = item.text.split("\n")
        data_table.append([player_name] + [player_original_name] + data_list)

# Close the browser
driver.quit()

In [4]:
columns_name = ["Player", "Player original name", "Event", "Team", "Position", "Games Played", "Win percentage", "KDA", "KP", "DTH%", "GD@10", "XPD@10", "CSPM", "DPM", "DMG%", "GOLD%", "WPM", "WCPM"]
df_players = pd.DataFrame(data_table, columns = columns_name)

In [5]:
def extract_year(text):
    year_pattern = r'\b(19|20)\d{2}\b'  # Matches years in the range 1900-2099
    matches = re.search(year_pattern, text)
    if matches:
        return matches.group(0)
    else:
        return None

df_players['Year'] = df_players['Event'].apply(extract_year)

In [8]:
df_players.shape

(335, 19)

In [9]:
# Eliminar percentatges:
columnes_percentatges = ["Win percentage", "DTH%", "KP", "DMG%", "GOLD%"]
for col in columnes_percentatges:
    df_players[col] = pd.to_numeric(df_players[col].replace('-', np.nan).str.rstrip('%'))

In [10]:
columnes_change_dtype = ["Games Played", "Win percentage", "KDA", "KP", "DTH%", "GD@10", "XPD@10", "CSPM", "DPM", "DMG%", "GOLD%", "WPM", "WCPM"]
df_players[columnes_change_dtype] = df_players[columnes_change_dtype].replace('-', np.nan).astype(float)

In [12]:
# Save as csv
df_players.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/PlayerStats.csv', index = False)

In [13]:
df_players.tail()

Unnamed: 0,Player,Player original name,Event,Team,Position,Games Played,Win percentage,KDA,KP,DTH%,GD@10,XPD@10,CSPM,DPM,DMG%,GOLD%,WPM,WCPM,Year
330,Nisqy,Yasin Dinçer,EUCS 2017 Spring Playoffs,Fnatic Academy,Middle,5.0,60.0,1.7,53.3,23.3,-343.0,3.0,8.3,352.0,24.6,22.0,0.42,0.15,2017
331,Nisqy,Yasin Dinçer,EUCS 2017 Spring,Fnatic Academy,Middle,10.0,50.0,2.6,56.3,18.9,-141.0,-301.0,8.4,366.0,22.1,24.0,0.46,0.17,2017
332,Nisqy,Yasin Dinçer,EUCS 2017 Spring Qualifiers,Fnatic Academy,Middle,7.0,86.0,7.9,71.7,16.4,118.0,155.0,7.8,518.0,24.8,22.8,0.44,0.2,2017
333,Nisqy,Yasin Dinçer,TPL 2016 Summer Finals,Team Orora,Middle,8.0,88.0,6.0,72.6,17.9,512.0,539.0,8.0,743.0,30.2,23.2,0.48,0.12,2016
334,Nisqy,Yasin Dinçer,EUCS 2016 Summer Qualifiers,Melty eSport Club,Middle,5.0,40.0,3.5,70.0,19.2,43.0,-343.0,7.5,675.0,27.4,21.2,0.59,0.21,2016


### GET GENERAL/OVERALL DATA

In [14]:
aggregations = {
    'Games Played': 'sum', 
    'Win percentage': 'mean',   
    'KDA': 'mean',
    'KP': 'mean',
    'DTH%': 'mean',
    'GD@10': 'mean',
    'XPD@10': 'mean',
    'CSPM': 'mean',
    'DPM': 'mean',
    'DMG%': 'mean',
    'GOLD%': 'mean',
    'WPM': 'mean',
    'WCPM': 'mean',
}

In [15]:
df_general = df_players.groupby(["Player", "Position", "Year", "Event"]).agg(aggregations).reset_index()

In [16]:
df_general

Unnamed: 0,Player,Position,Year,Event,Games Played,Win percentage,KDA,KP,DTH%,GD@10,XPD@10,CSPM,DPM,DMG%,GOLD%,WPM,WCPM
0,113,Jungle,2020,Turkey Academy 2020 Summer,14.0,43.0,2.6,75.8,24.8,92.0,107.0,5.1,273.0,15.0,19.1,0.37,0.32
1,113,Jungle,2020,Turkey Academy 2020 Winter,15.0,53.0,4.0,66.4,20.9,102.0,-15.0,4.9,343.0,19.3,18.0,0.60,0.33
2,113,Jungle,2020,Turkey Academy 2020 Winter Playoffs,3.0,0.0,1.7,63.2,25.0,6.0,-378.0,3.6,210.0,11.1,13.6,0.57,0.40
3,113,Jungle,2021,EM 2021 Summer Main Event,10.0,50.0,2.7,72.7,20.7,-282.0,-128.0,5.0,429.0,20.4,18.8,0.37,0.26
4,113,Jungle,2021,LVP SL 2021 Summer,18.0,44.0,2.0,68.6,28.6,-13.0,-51.0,5.1,361.0,17.4,17.0,0.52,0.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322,Yike,Jungle,2023,LEC 2023 Summer Season,9.0,89.0,5.4,75.0,22.9,203.0,101.0,5.7,426.0,18.6,20.0,0.28,0.23
323,Yike,Jungle,2023,LEC 2023 Winter Groups,5.0,80.0,5.8,77.3,22.0,345.0,141.0,6.7,687.0,24.4,19.7,0.23,0.28
324,Yike,Jungle,2023,LEC 2023 Winter Playoffs,7.0,86.0,6.4,69.6,19.7,111.0,79.0,6.2,584.0,23.2,21.2,0.24,0.29
325,Yike,Jungle,2023,LEC 2023 Winter Season,9.0,67.0,5.5,70.2,17.5,631.0,581.0,7.2,537.0,22.7,22.8,0.42,0.43


In [17]:
# Save as csv
df_players.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/PlayerStatsGeneral.csv', index = False)