In [1]:
# Importing libraries

import numpy as np
import pandas as pd
from datetime import datetime, date
import re
import requests
import random
import time
from collections import Counter
from tqdm import tqdm

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys

To make the graph the main step in to have a functional recommender system. To do the recommendation we will be using 3 main datasets:
- The champions dataset with their stats (HP, AD, AP, range, etc.)
- The champions dataset with their nature (lane, resourse used, type of champ, etc.)
- Dataset of player -> similar to the iTero method

In [232]:
### First dataset (stats dataset)

# Set up the Chrome WebDriver
driver = webdriver.Chrome()

# Navigate to the website
url = 'https://leagueoflegends.fandom.com/wiki/List_of_champions/Base_statistics'  # Replace with your website URL
driver.get(url)

# Wait until the table is loaded
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "sortable.wikitable.sticky-header.jquery-tablesorter")))

# Locate and extract table data
table_element = driver.find_element(By.CLASS_NAME, "sortable.wikitable.sticky-header.jquery-tablesorter")
table_html = table_element.get_attribute('outerHTML')

# Convert HTML to DataFrame
df_stats = pd.read_html(table_html)[0]

# Cleanup and close WebDriver
driver.quit()


In [233]:
df_stats['AS+(%)'] = df_stats['AS+'].str.replace('%', '').astype(float)
df_stats.drop(columns = ['AS+'], inplace = True)

In [234]:
df_stats

Unnamed: 0,Champions,HP,HP+,HP5,HP5+,MP,MP+,MP5,MP5+,AD,AD+,AS,AR,AR+,MR,MR+,MS,Range,AS+(%)
0,Aatrox,650,114.0,3.00,1.00,0,0.0,0.00,0.00,60,5.00,0.651,38.0,4.45,32.0,2.05,345,175,2.500
1,Ahri,590,96.0,2.50,0.60,418,25.0,8.00,0.80,53,3.00,0.668,21.0,4.70,30.0,1.30,330,550,2.000
2,Akali,570,119.0,9.00,0.90,200,0.0,50.00,0.00,62,3.30,0.625,23.0,4.70,37.0,2.05,345,125,3.200
3,Akshan,630,104.0,3.75,0.65,350,40.0,8.20,0.70,52,3.50,0.638,26.0,4.20,30.0,1.30,330,500,4.000
4,Alistar,685,120.0,8.50,0.85,350,40.0,8.50,0.80,62,3.75,0.625,47.0,4.70,32.0,2.05,330,125,2.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,Zeri,630,110.0,3.25,0.70,250,45.0,6.00,0.80,53,1.30,0.658,24.0,4.20,30.0,1.30,335,500,2.000
162,Ziggs,606,106.0,6.50,0.60,480,23.5,8.00,0.80,54,3.10,0.656,22.0,4.50,30.0,1.30,325,550,2.000
163,Zilean,574,96.0,5.50,0.50,452,50.0,11.35,0.80,52,3.00,0.625,24.0,5.00,30.0,1.30,335,550,2.130
164,Zoe,630,106.0,6.50,0.60,425,25.0,8.00,0.65,58,3.30,0.625,21.0,4.70,30.0,1.30,340,550,2.500


In [235]:
### Second datset (nature dataset)

# Champions to scrap
champions = list(df_stats['Champions'])

# Set up the Chrome WebDriver
driver = webdriver.Chrome()

# List where we will store the data
data = []

# Iterate through the champs
for i,champ in enumerate(champions):

    # Go to the url
    url = f"https://leagueoflegends.fandom.com/wiki/{champ}/LoL"
    driver.get(url)

    # Remove the cookies pop-up
    if i == 0:
        driver.implicitly_wait(5)
        driver.find_element(By.CSS_SELECTOR, 'div._2O--J403t2VqCuF8XJAZLK').click()


    # Wait until data loads
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "infobox-champion-container")))

    # Find the infobox div
    infobox_div = driver.find_element(By.ID, 'infobox-champion-container')

    # Get the information div's that contian the data
    information_divs = infobox_div.find_elements(By.CLASS_NAME, 'pi-item.pi-data.pi-item-spacing.pi-border-color')
    information_divs = information_divs[4:9]

    # Store champ data
    aux = [champ]

    for div_element in information_divs:
        span_text = div_element.find_elements(By.CSS_SELECTOR, 'div span a')
        list_attr = [i.text for i in span_text]
        aux.append(list(filter(None, list_attr)))
    
    # Add champ data to the final data list
    data.append(aux)


# Close the browser
driver.quit()


In [310]:
#Store positional data
df_pos = pd.DataFrame(data)
df_pos = df_pos.iloc[:, [0, 2]]
df_pos.columns = ['label', 'Position']
df_pos['Position'] = df_pos['Position'].apply(lambda x: x[0])
df_pos
df_pos.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/champPosition.csv', index = False)

In [236]:
# Pass the data to dataframe
df_nature = pd.DataFrame(data)

In [256]:
# Explode and create one-hot table
df_nature.rename(columns={0: 'champ_name'}, inplace=True)
exploded_df = df_nature.melt(id_vars=['champ_name'], value_name='Category').drop(columns='variable').explode('Category')
encoded_df = pd.pivot_table(exploded_df, index=['champ_name'], columns=['Category'], aggfunc=len, fill_value=0).reset_index()
encoded_df.replace(2, 1, inplace=True)

In [260]:
encoded_df

Category,champ_name,Assassin,Blood Well,Bloodthirst,Bottom,Courage,Energy,Ferocity,Fighter,Flow,...,Melee,Middle,Physical,Rage,Ranged,Shield,Soul Unbound,Support,Tank,Top
0,Aatrox,0,1,0,0,0,0,0,1,0,...,1,1,1,0,0,0,0,0,1,1
1,Ahri,1,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
2,Akali,1,0,0,0,0,1,0,0,0,...,1,1,1,0,0,0,0,0,0,1
3,Akshan,1,0,0,0,0,0,0,0,0,...,0,1,1,0,1,0,0,0,0,0
4,Alistar,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,Zeri,0,0,0,1,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
162,Ziggs,0,0,0,1,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
163,Zilean,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,1,0,0
164,Zoe,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,1,0,0


In [265]:
# Join and save the first and second dataset
statsNature_df = pd.merge(df_stats, encoded_df, left_on = "Champions", right_on = "champ_name")
statsNature_df.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/statsNature.csv', index = False)

In [266]:
### Third dataset

summonerIds = []
summonerRegion = []
api_key = 'RGAPI-b094f7e9-5968-4c1d-841c-ef4255243c2f'

fullRegionList = ['euw1', 'na1', 'kr']

# Ratios for the first and second values in the tuple
first_value_ratios = {'CHALLENGER': 5, 'GRANDMASTER': 3, 'MASTER': 1, 'DIAMOND': 1}
second_value_ratios = {'I': 1, 'II': 3, 'III': 1, 'IV': 5}

# Create a list to store the tuples
tierList = []

# Generate 10000 tuples
for _ in range(100):
    first_value = random.choices(list(first_value_ratios.keys()), weights=first_value_ratios.values())[0]
    if first_value == 'DIAMOND':
        second_value = random.choices(list(second_value_ratios.keys()), weights=second_value_ratios.values())[0]
    else:
        second_value = 'I'
    tierList.append((first_value, second_value))

In [267]:
# Ger the summoner Ids
for y in fullRegionList:
    for z in range(0, len(tierList)):
        URL_ids = ('https://'+y+'.api.riotgames.com/lol/league-exp/v4/entries/RANKED_SOLO_5x5/'+tierList[z][0]+'/'+tierList[z][1]+'/?api_key='+api_key)
        response = requests.get(URL_ids)
        for x in range(0, len(response.json())):
            summonerIds.append(response.json()[x]['summonerId'])
            summonerRegion.append(y)

In [269]:
len(summonerIds[:10000])

10000

In [271]:
final_list = []
for y in tqdm(range(0, len(summonerIds[:2500]))):

    ID = summonerIds[y]
    URL_maestries = 'https://'+ summonerRegion[y] +'.api.riotgames.com/lol/champion-mastery/v4/champion-masteries/by-summoner/'+ID+'/?api_key='+api_key
    response = requests.get(URL_maestries)
    champList = []
    for z in range(0, 5):
        if z == len(response.json()):
            break
        else:
            if response.json()[z]['championPoints'] > 30000:
                champList.append(response.json()[z]['championId'])
    final_list.append(champList)
    time.sleep(1.2)

mostPlayed = pd.DataFrame(final_list, columns = ['Champ1', 'Champ2', 'Champ3', 'Champ4', 'Champ5'])

100%|██████████| 2500/2500 [1:04:17<00:00,  1.54s/it]


In [272]:
# Convert the dataframe to a list of lists
champion_lists = mostPlayed.values.tolist()

In [273]:
# Create a dictionary to store the relationships
champion_relationships = {}

# Iterate through each player's list and update the relationships
for champion_list in champion_lists:
    for champion in champion_list:
        if champion != 0:  # Exclude zeros
            if champion not in champion_relationships:
                champion_relationships[champion] = Counter()
            champion_relationships[champion].update(champion_list)

In [274]:
# Remove each champion from its own list of related champions
for champion, related_champions in champion_relationships.items():
    related_champions.pop(champion, None)

# Create a dictionary to store the top 5 related champions for each champion
top_related_champions = {}
for champion, related_champions in champion_relationships.items():
    top_related_champions[champion] = [related_champ for related_champ, _ in related_champions.most_common(5)]

# Create a list of dictionaries for the DataFrame
relationship_data = []
for champion, related_champions in top_related_champions.items():
    relationship_data.append(
        {
            'Champion': champion,
            'Rel1': related_champions[0],
            'Rel2': related_champions[1] if len(related_champions) > 1 else None,
            'Rel3': related_champions[2] if len(related_champions) > 2 else None,
            'Rel4': related_champions[3] if len(related_champions) > 3 else None,
            'Rel5': related_champions[4] if len(related_champions) > 4 else None,
        }
    )

# Create a new DataFrame
relationship_df = pd.DataFrame(relationship_data)

In [293]:
relationship_df = relationship_df.dropna(subset = ['Champion'])
relationship_df = relationship_df.fillna(0).astype(int)

In [298]:
relationship_df.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/masteryRel.csv', index = False)

In [299]:
# Get data to map id to champ name

# Set up the Chrome WebDriver
driver = webdriver.Chrome()

# Navigate to the website
url = 'https://darkintaqt.com/blog/champ-ids'  # Replace with your website URL
driver.get(url)

driver.implicitly_wait(5)
driver.find_element(By.CSS_SELECTOR, 'button._cc_m_accept').click()

# Wait until the table is loaded
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "table")))

# Locate and extract table data
table_div = driver.find_element(By.CLASS_NAME, "table")
table_element = table_div.find_element(By.TAG_NAME, "table")

# Get table html
table_html = table_element.get_attribute('outerHTML')

# Convert HTML to DataFrame
df_mapIdName = pd.read_html(table_html)[0].iloc[:, [0, -1]]

# Close the WebDriver
driver.quit()

In [300]:
df_mapIdName

Unnamed: 0,ID,Name
0,266,Aatrox
1,103,Ahri
2,84,Akali
3,166,Akshan
4,12,Alistar
...,...,...
159,221,Zeri
160,115,Ziggs
161,26,Zilean
162,142,Zoe


In [301]:
df_mapIdName.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/nameIdMap.csv', index = False)

In [53]:
# For each player get the champs he played

def scrape_player_data(player_name, player_data = None):
    driver = webdriver.Chrome()  # Replace with your preferred web driver (e.g., Firefox)

    url = 'https://gol.gg/esports/home/'  # Replace with the website URL

    try:
        driver.get(url)
        driver.maximize_window()

        # Find the search bar and input the player name
        parent_div = driver.find_element(By.CSS_SELECTOR, 'div.selectize-input.items.not-full.has-options')  # Find the input inside the div with class 'selectize-input'
        search_input = parent_div.find_element(By.TAG_NAME, 'input')

        search_input.send_keys(player_name)
        search_input.send_keys(Keys.ENTER)
        # Wait for the player page to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))  # Replace 'player-info' with an appropriate identifier for the player info

        if player_data == None:
            player_data = {'Player': [], 'Event': [], 'Champ_data': []}

        # Find the dropdown selector for seasons
        seasons_dropdown = driver.find_element(By.ID, 'cbtournament')  # Replace 'seasons' with the actual name of the dropdown
        seasons = [option.get_attribute('value') for option in seasons_dropdown.find_elements(By.TAG_NAME, 'option')][1:]
        print(seasons)
        for season in seasons:
            # Select the season from the dropdown
            seasons_dropdown = driver.find_element(By.ID, 'cbtournament')  # Replace 'seasons' with the actual name of the dropdown
            seasons_dropdown.send_keys(season)
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))

            table_element = driver.find_element(By.CLASS_NAME, 'table_list.footable.toggle-square-filled.footable-loaded.tablet')
            
            player_data['Player'].append(player_name)
            player_data['Event'].append(season)

            table_html = table_element.get_attribute('outerHTML')

            player_data['Champ_data'].append(pd.read_html(table_html)[0].dropna().replace('-', pd.to_numeric(pd.read_html(table_html)[0]['KDA'].dropna(), errors = 'coerce').mean()))

        # Get ALL seasons data
        current_url = driver.current_url
        base_url = "/".join(current_url.split("/")[:-4])
        new_url = f"{base_url}/season-ALL/split-ALL/tournament-ALL/"
        
        print(new_url)
        driver.get(new_url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'cbtournament')))

        table_element = driver.find_element(By.CLASS_NAME, 'table_list.footable.toggle-square-filled.footable-loaded.tablet')
        
        player_data['Player'].append(player_name)
        player_data['Event'].append("ALL")

        table_html = table_element.get_attribute('outerHTML')

        player_data['Champ_data'].append(pd.read_html(table_html)[0].dropna().replace('-', pd.to_numeric(pd.read_html(table_html)[0]['KDA'].dropna(), errors = 'coerce').mean()))


        return player_data

    except TimeoutException:
        print(f"Timed out while searching data for {player_name}")
        return None

    finally:
        driver.quit()

if __name__ == '__main__':
    players = ['113', 'Elyoya', "Caps", "Razork", "Yike", "Exakick", "Upset", "Humanoid", "Hylissang", "Nisqy"]

    all_players_data = []
    for i,player in enumerate(players):
        if i == 0:
            player_data = scrape_player_data(player)
            print(f"{player} scrapped correctly!")
        else:
            player_data = scrape_player_data(player, player_data)
            print(f"{player} scrapped correctly!")

['LEC Summer 2023', 'LEC Spring Groups 2023', 'LEC Spring Season 2023', 'LEC Winter Groups 2023', 'LEC Winter 2023', 'LFL Summer Playoffs 2022', 'LFL Summer 2022', 'EU Masters Spring 2022', 'EU Masters Spring Play-In 2022', 'LFL Spring Playoffs 2022', 'LFL Spring 2022', 'EU Masters Summer 2021', 'LVP Summer Playoffs 2021', 'LVP SL Summer 2021']
https://gol.gg/players/player-stats/3845/season-ALL/split-ALL/tournament-ALL/
113 scrapped correctly!
['LEC Summer Groups 2023', 'LEC Summer 2023', 'MSI 2023', 'LEC Spring Playoffs 2023', 'LEC Spring Groups 2023', 'LEC Spring Season 2023', 'LEC Winter Playoffs 2023', 'LEC Winter Groups 2023', 'LEC Winter 2023', 'World Championship Play-In 2022', 'LEC Summer Playoffs 2022', 'LEC Summer 2022', 'LEC Spring 2022', 'World Championship 2021', 'LEC Summer Playoffs 2021', 'LEC Summer 2021', 'MSI 2021', 'LEC Spring Playoffs 2021', 'LEC Spring 2021', 'Iberian Cup 2020', 'LVP SLO Summer Playoffs 2020', 'EU Masters Summer 2020', 'LVP SLO Summer 2020', 'EU M

In [54]:
df = pd.DataFrame.from_dict(player_data)

In [55]:
# Create an empty list to store the exploded dataframes
exploded_dataframes = []

# Iterate through each row and merge champ_data with player and event values
for _, row in df.iterrows():
    champ_df = row['Champ_data']
    champ_df['Player'] = row['Player']
    champ_df['Event'] = row['Event']
    exploded_dataframes.append(champ_df)

# Concatenate the exploded dataframes into a single dataframe
exploded_df = pd.concat(exploded_dataframes, ignore_index=True)

In [66]:
exploded_df['KDA'] = exploded_df['KDA'].astype(float)
exploded_df['Nb games'] = exploded_df['Nb games'].astype(int)
exploded_df['Win Rate(%)'] = exploded_df['Win Rate'].str.replace("%", "").astype(float)
exploded_df.drop(columns = ['Win Rate'], inplace = True)

In [74]:
exploded_df

Unnamed: 0,Champion,Nb games,KDA,Player,Event,Win Rate(%)
0,Viego,3,3.60,113,LEC Summer 2023,33.3
1,Vi,2,4.80,113,LEC Summer 2023,50.0
2,Wukong,1,0.90,113,LEC Summer 2023,0.0
3,Jarvan IV,1,1.60,113,LEC Summer 2023,0.0
4,Sejuani,1,2.32,113,LEC Summer 2023,100.0
...,...,...,...,...,...,...
2255,Zilean,1,1.30,Nisqy,ALL,0.0
2256,Ekko,1,1.50,Nisqy,ALL,0.0
2257,Camille,1,1.20,Nisqy,ALL,0.0
2258,Sion,1,8.00,Nisqy,ALL,0.0


In [75]:
exploded_df.to_csv('C:/Users/adars/OneDrive/Escritorio/ProjecteLolShiny/Data/PlayersChampPool.csv', index = False)