## Collecting Champion Meta Data from https://u.gg/
Gulliver Wutz

In [1]:
import requests
import time
import pytz
import datetime
import hashlib
import pandas as pd
from bs4 import BeautifulSoup

def unix_to_cest(unix_timestamp):
    dt_utc = datetime.datetime.utcfromtimestamp(unix_timestamp)
    cest = pytz.timezone('Europe/Berlin')
    dt_cest = dt_utc.replace(tzinfo=pytz.utc).astimezone(cest)
    formatted_dt = dt_cest.strftime('%Y-%m-%d %H:%M:%S')
    return formatted_dt

def string_to_hexadecimal(input_string):
    hash_object = hashlib.sha256()
    hash_object.update(input_string.encode())
    hex_digest = hash_object.hexdigest()
    return hex_digest

In [4]:
# Get list of all champions
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
html_text = requests.get('https://u.gg/lol/champions', headers=headers).text
soup = BeautifulSoup(html_text, 'lxml')
champions = soup.find_all('a', class_ = 'champion-link')

champions_df = pd.DataFrame(columns=['Name', 'Link'])

for champion in champions:
    new_data = {
    'Name': champion['href'].split('/')[-2],
    'Link': 'https://u.gg' + str(champion['href'])
    }
    new_row = pd.DataFrame([new_data])
    champions_df = pd.concat([champions_df, new_row], ignore_index=True)

In [3]:
# Get champion details

rank_criteria = {'Gold': '?rank=gold', 'Diamond+': '?rank=diamond_plus'}
region_criteria = {'World': '', 'NorthA': '&region=na1', 'EUW': '&region=euw1'}
patch_criteria = {'14.8': '', '14.7': '&patch=14_7', '14.6': '&patch=14_6', '14.5': '&patch=14_5', '14.4': '&patch=14_4'}

champion_meta_df = pd.DataFrame(columns=['Name', 'Patch', 'Region', 'Rank', 'Tier', 'Win Rate', 'Pick Rate',
                                         'Ban Rate', 'Matches', 'Collected CEST', 'Link', 'Sample ID'])

for link in champions_df['Link'].tolist():
    for rank_name, rank in rank_criteria.items():
        for region_name, region in region_criteria.items():
            for patch_name, patch in patch_criteria.items():
                temp_link = link + rank + region + patch
                print('Collecting ' + temp_link)
                print('Configuration: ' + rank_name + ' - ' + region_name + ' - ' + patch_name)

                current_timestamp = int(time.time())
                html_text = requests.get(temp_link, headers=headers).text
                soup = BeautifulSoup(html_text, 'lxml')
                champion_name = soup.find('span', class_ = 'champion-name').text.strip()
                champion_stats_container = soup.find('div', class_ = 'additional-stats-container')
                champion_stats = champion_stats_container.find_all('div')
                champion_tier = champion_stats[2].text
                champion_wr = champion_stats[5].text
                champion_pr = champion_stats[11].text
                champion_br = champion_stats[14].text
                champion_matches = champion_stats[17].text

                new_data = {
                'Name': champion_name,
                'Patch': patch_name,
                'Region': region_name,
                'Rank': rank_name,
                'Tier': champion_tier,
                'Win Rate': champion_wr,
                'Pick Rate': champion_pr,
                'Ban Rate': champion_br,
                'Matches': champion_matches,
                'Collected CEST': unix_to_cest(current_timestamp),
                'Link': temp_link,
                'Sample ID': string_to_hexadecimal(champion_name + rank_name + region_name + patch_name)
                }

                new_row = pd.DataFrame([new_data])
                champion_meta_df = pd.concat([champion_meta_df, new_row], ignore_index=True)
                champion_meta_df.to_csv('champion_meta_u_gg.csv', index=False)

                print('Collection complete!')
                print('\n')

                time.sleep(1)

              
champion_meta_df

Collecting https://u.gg/lol/champions/aatrox/build?rank=gold
Configuration: Gold - World - 14.8
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_7
Configuration: Gold - World - 14.7
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_6
Configuration: Gold - World - 14.6
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_5
Configuration: Gold - World - 14.5
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_4
Configuration: Gold - World - 14.4
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&region=na1
Configuration: Gold - NA - 14.8
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&region=na1&patch=14_7
Configuration: Gold - NA - 14.7
Collection complete!


Collecting https://u.gg/lol/champions/aatrox/build?rank=gold&region=na1&patch=14_6
Configuration

Unnamed: 0,Name,Patch,Region,Rank,Tier,Win Rate,Pick Rate,Ban Rate,Matches,Collected CEST,Link,Sample ID
0,Aatrox,14.8,World,Gold,D,48.89%,7.5%,10.5%,6196,2024-04-19 10:45:49,https://u.gg/lol/champions/aatrox/build?rank=gold,a6d0e7aae255c160fb384f853b84dbacc6a480e09e9326...
1,Aatrox,14.7,World,Gold,C,49.5%,8.2%,13.9%,121118,2024-04-19 10:45:51,https://u.gg/lol/champions/aatrox/build?rank=g...,5d1cd234fb83edcb9537545151262c3a4966d393c7e567...
2,Aatrox,14.6,World,Gold,D,48.95%,8.7%,15.4%,166115,2024-04-19 10:45:53,https://u.gg/lol/champions/aatrox/build?rank=g...,a0231de3df3c3732fcd03dee7b42d694905190774b52d1...
3,Aatrox,14.5,World,Gold,D,48.77%,9.4%,16.5%,126871,2024-04-19 10:45:55,https://u.gg/lol/champions/aatrox/build?rank=g...,4a30bd44aed76ac5ff610c8eb3b2cad8c31906e66cff28...
4,Aatrox,14.4,World,Gold,D,49.23%,10.1%,17.1%,185100,2024-04-19 10:45:56,https://u.gg/lol/champions/aatrox/build?rank=g...,72099c8f62fa506cd43e25adc5103ba6ad2b9c6ad1ad00...
...,...,...,...,...,...,...,...,...,...,...,...,...
5005,Zyra,14.8,EUW,Diamond+,A,51%,3.0%,1.7%,402,2024-04-19 15:36:15,https://u.gg/lol/champions/zyra/build?rank=dia...,26dae27f736319e9475b8fdcc6e6c8e05c9f4f7afcdab0...
5006,Zyra,14.7,EUW,Diamond+,B,49.19%,2.9%,2.1%,4802,2024-04-19 15:36:19,https://u.gg/lol/champions/zyra/build?rank=dia...,21ba503ce13761c3a0d677f94bd7bad631bf0110f9c163...
5007,Zyra,14.6,EUW,Diamond+,A,50.19%,3.8%,2.9%,7316,2024-04-19 15:36:22,https://u.gg/lol/champions/zyra/build?rank=dia...,7878882ede49d7bcb46185d4c0e6fcf0a211ba3d887a7a...
5008,Zyra,14.5,EUW,Diamond+,A,50.19%,4.1%,2.8%,5575,2024-04-19 15:36:26,https://u.gg/lol/champions/zyra/build?rank=dia...,6d189380615dfb2424d98e11f0b8bb75f4fd8a5bb07f35...


## Inspect generated Dataset

In [108]:
# Inspect Dataset
champion_meta_df = pd.read_csv('champion_meta_u_gg.csv')
champion_meta_df

Unnamed: 0,Name,Patch,Region,Rank,Tier,Win Rate,Pick Rate,Ban Rate,Matches,Collected CEST,Link,Sample ID
0,Aatrox,14.8,World,Gold,D,48.89%,7.5%,10.5%,6196,2024-04-19 10:45:49,https://u.gg/lol/champions/aatrox/build?rank=gold,ff6861e35edf64ba921a093c887682d82a0cbac29ff7fff58e92aeb6131ff5af
1,Aatrox,14.7,World,Gold,C,49.5%,8.2%,13.9%,121118,2024-04-19 10:45:51,https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_7,147b0829bce4e3659681bbfa81001cb90368406098ae197ab55b8d7750665134
2,Aatrox,14.6,World,Gold,D,48.95%,8.7%,15.4%,166115,2024-04-19 10:45:53,https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_6,8a1d40c696c950a41bb93422a5a14a9b70cceabd9d050f49403a3e2506dbd020
3,Aatrox,14.5,World,Gold,D,48.77%,9.4%,16.5%,126871,2024-04-19 10:45:55,https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_5,523f490ea8088f181737fff63350d040b3d7256061a1ceb5acaa2f2233c84c43
4,Aatrox,14.4,World,Gold,D,49.23%,10.1%,17.1%,185100,2024-04-19 10:45:56,https://u.gg/lol/champions/aatrox/build?rank=gold&patch=14_4,4b9bd7b0e3290e53a669f595f928bd3465c5fcb6254be1a3fc00ce0288017890
...,...,...,...,...,...,...,...,...,...,...,...,...
5005,Zyra,14.8,EUW,Diamond+,A,51%,3.0%,1.7%,402,2024-04-19 15:36:15,https://u.gg/lol/champions/zyra/build?rank=diamond_plus&region=euw1,c82f5f9ff6f5e19a8fa0b96f0d07d396d956f3fe61b1b6d6f292be51f7dfcad5
5006,Zyra,14.7,EUW,Diamond+,B,49.19%,2.9%,2.1%,4802,2024-04-19 15:36:19,https://u.gg/lol/champions/zyra/build?rank=diamond_plus&region=euw1&patch=14_7,92c103b4cba2452c963b6334ba23cb1bb888f0603e54e93779b2c877bf503dd7
5007,Zyra,14.6,EUW,Diamond+,A,50.19%,3.8%,2.9%,7316,2024-04-19 15:36:22,https://u.gg/lol/champions/zyra/build?rank=diamond_plus&region=euw1&patch=14_6,6875946726155d806fdddc18ea8e7267107d99661bd442aac6e9706a89d6479b
5008,Zyra,14.5,EUW,Diamond+,A,50.19%,4.1%,2.8%,5575,2024-04-19 15:36:26,https://u.gg/lol/champions/zyra/build?rank=diamond_plus&region=euw1&patch=14_5,fa3c2673415c72394c0881a46a1bcf9bfb87dd099dab755367b7e89d986da380


In [109]:
# Check if Sample ID hashes are ideed uniquely generated
print("Sample ID column contains duplicates: " + str(champion_meta_df['Sample ID'].duplicated().any()))
print("Link column contains duplicates: " + str(champion_meta_df['Link'].duplicated().any()))
print("Dataset contains NaNs: " + str(champion_meta_df.isna().any().any()))


Sample ID column contains duplicates: False
Link column contains duplicates: False
Dataset contains NaNs: False


In [111]:
# Print out all unique values of a selected column
champion_meta_df['Tier'].unique().tolist()

['D', 'C', 'S+', 'B', 'A', 'S', '?']

## Sanitize Dataset

In [113]:
# Convert Win Rate to float
champion_meta_df['Win Rate'] = champion_meta_df['Win Rate'].str.replace('%', '').astype(float) / 100

In [114]:
# Convet Pick Rate to float
champion_meta_df['Pick Rate'] = champion_meta_df['Pick Rate'].str.replace('<', '').str.replace('Pick Rate', '').str.replace('%', '').str.strip().astype(float) / 100

In [115]:
# Convert Ban Rate to float
champion_meta_df['Ban Rate'] = champion_meta_df['Ban Rate'].str.replace('Ban Rate', '').str.replace('%', '')
champion_meta_df['Ban Rate'] = champion_meta_df['Ban Rate'].apply(lambda x: '0' if x.strip().startswith('-') else x).astype(float) / 100

In [116]:
# Convert Matches to int
champion_meta_df['Matches'] = champion_meta_df['Matches'].str.replace(',', '').str.replace('Matches', '').astype(int)

In [119]:
# Write to CSV
champion_meta_df.to_csv('champion_meta_u_gg.csv', index=False)

## Inspect sanitized Dataset

In [2]:
# Inspect final Dataset
champion_meta_df = pd.read_csv('champion_meta_u_gg.csv')
champion_meta_df.dtypes

Name               object
Patch             float64
Region             object
Rank               object
Tier               object
Win Rate          float64
Pick Rate         float64
Ban Rate          float64
Matches             int64
Collected CEST     object
Link               object
Sample ID          object
dtype: object

In [5]:
champion_meta_df

Unnamed: 0,Name,Patch,Region,Rank,Tier,Win Rate,Pick Rate,Ban Rate,Matches,Collected CEST,Link,Sample ID
0,Aatrox,14.8,World,Gold,D,0.4889,0.075,0.105,6196,2024-04-19 10:45:49,https://u.gg/lol/champions/aatrox/build?rank=gold,ff6861e35edf64ba921a093c887682d82a0cbac29ff7ff...
1,Aatrox,14.7,World,Gold,C,0.4950,0.082,0.139,121118,2024-04-19 10:45:51,https://u.gg/lol/champions/aatrox/build?rank=g...,147b0829bce4e3659681bbfa81001cb90368406098ae19...
2,Aatrox,14.6,World,Gold,D,0.4895,0.087,0.154,166115,2024-04-19 10:45:53,https://u.gg/lol/champions/aatrox/build?rank=g...,8a1d40c696c950a41bb93422a5a14a9b70cceabd9d050f...
3,Aatrox,14.5,World,Gold,D,0.4877,0.094,0.165,126871,2024-04-19 10:45:55,https://u.gg/lol/champions/aatrox/build?rank=g...,523f490ea8088f181737fff63350d040b3d7256061a1ce...
4,Aatrox,14.4,World,Gold,D,0.4923,0.101,0.171,185100,2024-04-19 10:45:56,https://u.gg/lol/champions/aatrox/build?rank=g...,4b9bd7b0e3290e53a669f595f928bd3465c5fcb6254be1...
...,...,...,...,...,...,...,...,...,...,...,...,...
5005,Zyra,14.8,EUW,Diamond+,A,0.5100,0.030,0.017,402,2024-04-19 15:36:15,https://u.gg/lol/champions/zyra/build?rank=dia...,c82f5f9ff6f5e19a8fa0b96f0d07d396d956f3fe61b1b6...
5006,Zyra,14.7,EUW,Diamond+,B,0.4919,0.029,0.021,4802,2024-04-19 15:36:19,https://u.gg/lol/champions/zyra/build?rank=dia...,92c103b4cba2452c963b6334ba23cb1bb888f0603e54e9...
5007,Zyra,14.6,EUW,Diamond+,A,0.5019,0.038,0.029,7316,2024-04-19 15:36:22,https://u.gg/lol/champions/zyra/build?rank=dia...,6875946726155d806fdddc18ea8e7267107d99661bd442...
5008,Zyra,14.5,EUW,Diamond+,A,0.5019,0.041,0.028,5575,2024-04-19 15:36:26,https://u.gg/lol/champions/zyra/build?rank=dia...,fa3c2673415c72394c0881a46a1bcf9bfb87dd099dab75...
