## Collecting Champion Meta Data from https://lolalytics.com/
Gulliver Wutz

In [10]:
import requests
import time
import pytz
import datetime
import hashlib
import pandas as pd
from bs4 import BeautifulSoup

def unix_to_cest(unix_timestamp):
    dt_utc = datetime.datetime.utcfromtimestamp(unix_timestamp)
    cest = pytz.timezone('Europe/Berlin')
    dt_cest = dt_utc.replace(tzinfo=pytz.utc).astimezone(cest)
    formatted_dt = dt_cest.strftime('%Y-%m-%d %H:%M:%S')
    return formatted_dt

def string_to_hexadecimal(input_string):
    hash_object = hashlib.sha256()
    hash_object.update(input_string.encode())
    hex_digest = hash_object.hexdigest()
    return hex_digest

pd.set_option('display.max_colwidth', 200)

In [172]:
# Get list of all champions
file_path = 'C:/Users/wutzg/Desktop/Master Thesis/Artifacts/Webpage HTML/LoL Tier List - LoLalytics LoL Tier List for Patch 14.8.html'
with open(file_path, 'r', encoding='utf-8') as file:
    html_text = file.read()

soup = BeautifulSoup(html_text, 'lxml')
champion_containers = soup.find_all('div', class_ = 'mb-2 h-[215px] w-[90px] overflow-hidden border border-[#4e6a6c] bg-black hover:border-white')

champions_df = pd.DataFrame(columns=['Name', 'Link'])

for champion_container in champion_containers:
    champion_link = champion_container.find('a')
    name_container = champion_link.find('div')
    champion_name = name_container.find('div')

    new_data = {
    'Name': champion_name.text,
    'Link': 'https://lolalytics.com' + str(champion_link['href'])
    }
    new_row = pd.DataFrame([new_data])
    champions_df = pd.concat([champions_df, new_row], ignore_index=True)

champions_df['Link'] = champions_df['Link'].str.split('?').str[0]
champions_df = champions_df.drop_duplicates(subset='Name', keep='first')
champions_df = champions_df.sort_values(by='Name').reset_index(drop=True)

In [121]:
# Get champion details

rank_criteria = {'Gold': '?tier=gold', 'Diamond+': '?tier=diamond_plus'}
region_criteria = {'World': '', 'NorthA': '&region=na', 'EUW': '&region=euw'}
patch_criteria = {'14.8': '', '14.7': '&patch=14.7', '14.6': '&patch=14.6', '14.5': '&patch=14.5', '14.4': '&patch=14.4'}

champion_meta_df = pd.DataFrame(columns=['Name', 'Patch', 'Region', 'Rank', 'Tier', 'Win Rate', 'Pick Rate',
                                         'Ban Rate', 'Matches', 'Collected CEST', 'Link', 'Sample ID'])

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

for link in champions_df['Link'].tolist():
    for rank_name, rank in rank_criteria.items():
        for region_name, region in region_criteria.items():
            for patch_name, patch in patch_criteria.items():
                temp_link = link + rank + region + patch
                print('Collecting ' + temp_link)
                print('Configuration: ' + rank_name + ' - ' + region_name + ' - ' + patch_name)
                print('')

                current_timestamp = int(time.time())
                html_text = requests.get(temp_link, headers=headers).text
                soup = BeautifulSoup(html_text, 'lxml')

                champion_name = soup.find('div', class_ = 'border-b-[3px] border-[#22b2ff] text-[#22b2ff]').text.split(' ')
                champion_name = champion_name[:-1]        # Removing the last word
                champion_name = " ".join(champion_name)
                champion_stats_container = soup.find('div', class_ = 'mt-4')
                stats1 = champion_stats_container.find('div', class_ = 'flex justify-around border border-[#333333] p-2 text-center')
                stats2 = champion_stats_container.find('div', class_ = 'mt-2 flex justify-around border border-[#333333] p-2 text-center')
                champion_tier = stats2.find('div', class_ = 'mb-1 font-bold').text
                champion_wr = stats1.find('div', class_ = 'mb-1 font-bold').text
                champion_br = stats2.find_all('div', class_ = 'mb-1 font-bold')[2].text
                champion_matches = stats2.find_all('div', class_ = 'mb-1 font-bold')[3].text

                if len(stats1.find_all('div', class_ = 'mb-1 font-bold')) > 2:
                    champion_pr = stats1.find_all('div', class_ = 'mb-1 font-bold')[3].text
                else:
                    champion_pr = stats1.find_all('div', class_ = 'mb-1 font-bold')[1].text

                new_data = {
                'Name': champion_name,
                'Patch': patch_name,
                'Region': region_name,
                'Rank': rank_name,
                'Tier': champion_tier,
                'Win Rate': champion_wr,
                'Pick Rate': champion_pr,
                'Ban Rate': champion_br,
                'Matches': champion_matches,
                'Collected CEST': unix_to_cest(current_timestamp),
                'Link': temp_link,
                'Sample ID': string_to_hexadecimal(champion_name + rank_name + region_name + patch_name)
                }

                print('Champion: ' + champion_name)
                print('Patch: ' + patch_name)
                print('Region: ' + region_name)
                print('Rank: ' + rank_name)
                print('Tier: ' + champion_tier)
                print('Win Rate: ' + champion_wr)
                print('Pick Rate: ' + champion_pr)
                print('Ban Rate: ' + champion_br)
                print('Matches: ' + champion_matches)

                new_row = pd.DataFrame([new_data])
                champion_meta_df = pd.concat([champion_meta_df, new_row], ignore_index=True)
                champion_meta_df.to_csv('champion_meta_lolalytics_com.csv', index=False)

                print('')
                print('Collection complete!')
                print('\n')

                time.sleep(1)

Collecting https://lolalytics.com/lol/aatrox/build/?tier=gold
Configuration: Gold - World - 14.8

Champion: Aatrox
Patch: 14.8
Region: World
Rank: Gold
Tier: B+
Win Rate: 49.06%
Pick Rate: 8.14%
Ban Rate: 13.85%
Matches: 42,383

Collection complete!


Collecting https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.7
Configuration: Gold - World - 14.7

Champion: Aatrox
Patch: 14.7
Region: World
Rank: Gold
Tier: B
Win Rate: 49.12%
Pick Rate: 8.39%
Ban Rate: 14.64%
Matches: 146,541

Collection complete!


Collecting https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.6
Configuration: Gold - World - 14.6

Champion: Aatrox
Patch: 14.6
Region: World
Rank: Gold
Tier: B+
Win Rate: 48.4%
Pick Rate: 8.56%
Ban Rate: 14.96%
Matches: 155,671

Collection complete!


Collecting https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.5
Configuration: Gold - World - 14.5

Champion: Aatrox
Patch: 14.5
Region: World
Rank: Gold
Tier: B
Win Rate: 48.04%
Pick Rate: 9.22%
Ban Rate: 15.59%


## Sanitize Dataset

In [163]:
# Convert Win Rate to float
champion_meta_df = pd.read_csv('champion_meta_lolalytics_com.csv')
champion_meta_df['Win Rate'] = champion_meta_df['Win Rate'].str.replace('%', '').astype(float) / 100

In [164]:
# Convet Pick Rate to float
champion_meta_df['Pick Rate'] = champion_meta_df['Pick Rate'].str.replace('%', '').astype(float) / 100

In [165]:
# Convert Ban Rate to float
champion_meta_df['Ban Rate'] = champion_meta_df['Ban Rate'].str.replace('%', '').astype(float) / 100

In [166]:
# Convert Matches to int
champion_meta_df['Matches'] = champion_meta_df['Matches'].str.replace(',', '').astype(int)

In [167]:
# Write to CSV
champion_meta_df.to_csv('champion_meta_lolalytics_com.csv', index=False)

## Inspect sanitized Dataset

In [6]:
# Check if Sample ID hashes are ideed uniquely generated
champion_meta_df = pd.read_csv('champion_meta_lolalytics_com.csv')
print("Sample ID column contains duplicates: " + str(champion_meta_df['Sample ID'].duplicated().any()))
print("Link column contains duplicates: " + str(champion_meta_df['Link'].duplicated().any()))
print("Dataset contains NaNs: " + str(champion_meta_df.isna().any().any()))

Sample ID column contains duplicates: False
Link column contains duplicates: False
Dataset contains NaNs: False


In [7]:
# Print out all unique values of a selected column
champion_meta_df['Rank'].unique().tolist()

['Gold', 'Diamond+']

In [8]:
# Inspect Dataset Types
champion_meta_df.dtypes

Name               object
Patch             float64
Region             object
Rank               object
Tier               object
Win Rate          float64
Pick Rate         float64
Ban Rate          float64
Matches             int64
Collected CEST     object
Link               object
Sample ID          object
dtype: object

In [11]:
# Inspect Dataset
champion_meta_df

Unnamed: 0,Name,Patch,Region,Rank,Tier,Win Rate,Pick Rate,Ban Rate,Matches,Collected CEST,Link,Sample ID
0,Aatrox,14.8,World,Gold,B+,0.4906,0.0814,0.1385,42383,2024-04-21 12:12:58,https://lolalytics.com/lol/aatrox/build/?tier=gold,ff6861e35edf64ba921a093c887682d82a0cbac29ff7fff58e92aeb6131ff5af
1,Aatrox,14.7,World,Gold,B,0.4912,0.0839,0.1464,146541,2024-04-21 12:12:59,https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.7,147b0829bce4e3659681bbfa81001cb90368406098ae197ab55b8d7750665134
2,Aatrox,14.6,World,Gold,B+,0.4840,0.0856,0.1496,155671,2024-04-21 12:13:00,https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.6,8a1d40c696c950a41bb93422a5a14a9b70cceabd9d050f49403a3e2506dbd020
3,Aatrox,14.5,World,Gold,B,0.4804,0.0922,0.1559,190071,2024-04-21 12:13:02,https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.5,523f490ea8088f181737fff63350d040b3d7256061a1ceb5acaa2f2233c84c43
4,Aatrox,14.4,World,Gold,B,0.4835,0.0958,0.1604,198593,2024-04-21 12:13:03,https://lolalytics.com/lol/aatrox/build/?tier=gold&patch=14.4,4b9bd7b0e3290e53a669f595f928bd3465c5fcb6254be1a3fc00ce0288017890
...,...,...,...,...,...,...,...,...,...,...,...,...
5005,Zyra,14.8,EUW,Diamond+,A-,0.5314,0.0366,0.0303,2531,2024-04-21 14:14:22,https://lolalytics.com/lol/zyra/build/?tier=diamond_plus&region=euw,c82f5f9ff6f5e19a8fa0b96f0d07d396d956f3fe61b1b6d6f292be51f7dfcad5
5006,Zyra,14.7,EUW,Diamond+,B,0.5228,0.0304,0.0315,6842,2024-04-21 14:14:23,https://lolalytics.com/lol/zyra/build/?tier=diamond_plus&region=euw&patch=14.7,92c103b4cba2452c963b6334ba23cb1bb888f0603e54e93779b2c877bf503dd7
5007,Zyra,14.6,EUW,Diamond+,B,0.5321,0.0388,0.0396,8687,2024-04-21 14:14:25,https://lolalytics.com/lol/zyra/build/?tier=diamond_plus&region=euw&patch=14.6,6875946726155d806fdddc18ea8e7267107d99661bd442aac6e9706a89d6479b
5008,Zyra,14.5,EUW,Diamond+,A+,0.5372,0.0427,0.0404,9968,2024-04-21 14:14:26,https://lolalytics.com/lol/zyra/build/?tier=diamond_plus&region=euw&patch=14.5,fa3c2673415c72394c0881a46a1bcf9bfb87dd099dab755367b7e89d986da380
