## Collecting Champion Meta Data from https://www.metasrc.com/
Gulliver Wutz

In [1]:
import requests
import time
import pytz
import datetime
import hashlib
import pandas as pd
from bs4 import BeautifulSoup

def unix_to_cest(unix_timestamp):
    dt_utc = datetime.datetime.utcfromtimestamp(unix_timestamp)
    cest = pytz.timezone('Europe/Berlin')
    dt_cest = dt_utc.replace(tzinfo=pytz.utc).astimezone(cest)
    formatted_dt = dt_cest.strftime('%Y-%m-%d %H:%M:%S')
    return formatted_dt

def string_to_hexadecimal(input_string):
    hash_object = hashlib.sha256()
    hash_object.update(input_string.encode())
    hex_digest = hash_object.hexdigest()
    return hex_digest

pd.set_option('display.max_colwidth', 200)

In [2]:
# Get list of all champions
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
html_text = requests.get('https://www.metasrc.com/lol', headers=headers).text
soup = BeautifulSoup(html_text, 'lxml')
champion_container = soup.find('div', class_ = '_ate82z _rn2u1o _cn8bui')
champions = champion_container.find_all('a')

champions_df = pd.DataFrame(columns=['Name', 'Link'])

for champion in champions:

    champion_name = champion['href'].split('/')[-1]
    champion_link = champion['href']

    new_data = {
    'Name': champion_name,
    'Link': str(champion_link)
    }
    new_row = pd.DataFrame([new_data])
    champions_df = pd.concat([champions_df, new_row], ignore_index=True)

In [3]:
# Get champion details
# https://www.metasrc.com/lol/build/aatrox
# https://www.metasrc.com/lol/na/14.6/build/aatrox?ranks=gold

rank_criteria = {'Gold': '?ranks=gold', 'Diamond+': '?ranks=diamond,master,grandmaster,challenger'}
region_criteria = {'World': '', 'NorthA': 'na', 'EUW': 'euw'}
patch_criteria = {'14.8': '', '14.7': '14.7', '14.6': '14.6', '14.5': '14.5', '14.4': '14.4'}

champion_meta_df = pd.DataFrame(columns=['Name', 'Patch', 'Region', 'Rank', 'Tier', 'Win Rate', 'Pick Rate',
                                         'Ban Rate', 'Matches', 'Collected CEST', 'Link', 'Sample ID'])


for link in champions_df['Link'].tolist():
    for rank_name, rank in rank_criteria.items():
        for region_name, region in region_criteria.items():
            for patch_name, patch in patch_criteria.items():

                parts = link.split('/')
                build_index = parts.index('build')
                parts.insert(build_index, patch)
                parts.insert(build_index, region)
                temp_link = '/'.join(parts)
                temp_link = temp_link + rank

                print('Collecting ' + temp_link)
                print('Configuration: ' + rank_name + ' - ' + region_name + ' - ' + patch_name)
                print('')

                current_timestamp = int(time.time())
                html_text = requests.get(temp_link, headers=headers).text
                soup = BeautifulSoup(html_text, 'lxml')
                
                champion_name = soup.find('div', class_ = '_b0j1oy').find('span', class_ = '_6b8ssn').text.split()
                champion_name = ' '.join(champion_name[:-2])
                champion_stats_container = soup.find('div', class_ = '_fcip6v _eq293a _r14nwh')
                champion_tier = champion_stats_container.find_all('span', class_ = '_dxv0e1')[0].text.split()[1]
                champion_wr = champion_stats_container.find_all('span', class_ = '_dxv0e1')[1].text[3:]
                champion_pr = champion_stats_container.find_all('span', class_ = '_dxv0e1')[3].text[4:]
                champion_br = champion_stats_container.find_all('span', class_ = '_dxv0e1')[4].text[3:]
                champion_matches = champion_stats_container.find_all('span', class_ = '_dxv0e1')[5].text.split()[1]

                new_data = {
                'Name': champion_name,
                'Patch': patch_name,
                'Region': region_name,
                'Rank': rank_name,
                'Tier': champion_tier,
                'Win Rate': champion_wr,
                'Pick Rate': champion_pr,
                'Ban Rate': champion_br,
                'Matches': champion_matches,
                'Collected CEST': unix_to_cest(current_timestamp),
                'Link': temp_link,
                'Sample ID': string_to_hexadecimal(champion_name + rank_name + region_name + patch_name)
                }

                print('Champion: ' + champion_name)
                print('Patch: ' + patch_name)
                print('Region: ' + region_name)
                print('Rank: ' + rank_name)
                print('Tier: ' + champion_tier)
                print('Win Rate: ' + champion_wr)
                print('Pick Rate: ' + champion_pr)
                print('Ban Rate: ' + champion_br)
                print('Matches: ' + champion_matches)

                new_row = pd.DataFrame([new_data])
                champion_meta_df = pd.concat([champion_meta_df, new_row], ignore_index=True)
                champion_meta_df.to_csv('champion_meta_metasrc_com.csv', index=False)

                print('')
                print('Collection complete!')
                print('\n')

                time.sleep(1)

Collecting https://www.metasrc.com/lol///build/aatrox?ranks=gold
Configuration: Gold - World - 14.8

Champion: Aatrox
Patch: 14.8
Region: World
Rank: Gold
Tier: S+
Win Rate: 48.94%
Pick Rate: 9.09%
Ban Rate: 13.92%
Matches: 46181

Collection complete!


Collecting https://www.metasrc.com/lol//14.7/build/aatrox?ranks=gold
Configuration: Gold - World - 14.7

Champion: Aatrox
Patch: 14.7
Region: World
Rank: Gold
Tier: S+
Win Rate: 49.30%
Pick Rate: 9.46%
Ban Rate: 15.33%
Matches: 113536

Collection complete!


Collecting https://www.metasrc.com/lol//14.6/build/aatrox?ranks=gold
Configuration: Gold - World - 14.6

Champion: Aatrox
Patch: 14.6
Region: World
Rank: Gold
Tier: S+
Win Rate: 48.83%
Pick Rate: 9.96%
Ban Rate: 16.35%
Matches: 88673

Collection complete!


Collecting https://www.metasrc.com/lol//14.5/build/aatrox?ranks=gold
Configuration: Gold - World - 14.5

Champion: Aatrox
Patch: 14.5
Region: World
Rank: Gold
Tier: S+
Win Rate: 48.75%
Pick Rate: 10.68%
Ban Rate: 16.49%
Matches: 

## Sanitize Dataset

In [54]:
# Convert Win Rate to float
champion_meta_df = pd.read_csv('champion_meta_metasrc_com.csv')
champion_meta_df['Win Rate'] = champion_meta_df['Win Rate'].str.replace('%', '').astype(float) / 100

In [55]:
# Convet Pick Rate to float
champion_meta_df['Pick Rate'] = champion_meta_df['Pick Rate'].str.replace('%', '').astype(float) / 100

In [56]:
# Convert Ban Rate to float
champion_meta_df['Ban Rate'] = champion_meta_df['Ban Rate'].str.replace('%', '').astype(float) / 100

In [57]:
# Convert Matches to int
champion_meta_df['Matches'] = champion_meta_df['Matches'].astype(int)

In [58]:
# Write to CSV
champion_meta_df.to_csv('champion_meta_metasrc_com.csv', index=False)

## Inspect sanitized Dataset

In [63]:
# Check if Sample ID hashes are ideed uniquely generated
champion_meta_df = pd.read_csv('champion_meta_metasrc_com.csv')
print("Sample ID column contains duplicates: " + str(champion_meta_df['Sample ID'].duplicated().any()))
print("Link column contains duplicates: " + str(champion_meta_df['Link'].duplicated().any()))
print("Dataset contains NaNs: " + str(champion_meta_df.isna().any().any()))

Sample ID column contains duplicates: False
Link column contains duplicates: False
Dataset contains NaNs: False


In [64]:
# Print out all unique values of a selected column
champion_meta_df['Tier'].unique().tolist()

['S+', 'S', 'A', 'B', 'C', 'D', 'Off']

In [65]:
# Inspect Dataset Types
champion_meta_df.dtypes

Name               object
Patch             float64
Region             object
Rank               object
Tier               object
Win Rate          float64
Pick Rate         float64
Ban Rate          float64
Matches             int64
Collected CEST     object
Link               object
Sample ID          object
dtype: object

In [66]:
# Inspect Dataset
champion_meta_df

Unnamed: 0,Name,Patch,Region,Rank,Tier,Win Rate,Pick Rate,Ban Rate,Matches,Collected CEST,Link,Sample ID
0,Aatrox,14.8,World,Gold,S+,0.4894,0.0909,0.1392,46181,2024-04-23 10:09:02,https://www.metasrc.com/lol///build/aatrox?ranks=gold,ff6861e35edf64ba921a093c887682d82a0cbac29ff7fff58e92aeb6131ff5af
1,Aatrox,14.7,World,Gold,S+,0.4930,0.0946,0.1533,113536,2024-04-23 10:09:04,https://www.metasrc.com/lol//14.7/build/aatrox?ranks=gold,147b0829bce4e3659681bbfa81001cb90368406098ae197ab55b8d7750665134
2,Aatrox,14.6,World,Gold,S+,0.4883,0.0996,0.1635,88673,2024-04-23 10:09:05,https://www.metasrc.com/lol//14.6/build/aatrox?ranks=gold,8a1d40c696c950a41bb93422a5a14a9b70cceabd9d050f49403a3e2506dbd020
3,Aatrox,14.5,World,Gold,S+,0.4875,0.1068,0.1649,85547,2024-04-23 10:09:06,https://www.metasrc.com/lol//14.5/build/aatrox?ranks=gold,523f490ea8088f181737fff63350d040b3d7256061a1ceb5acaa2f2233c84c43
4,Aatrox,14.4,World,Gold,S+,0.4870,0.1091,0.1730,117003,2024-04-23 10:09:08,https://www.metasrc.com/lol//14.4/build/aatrox?ranks=gold,4b9bd7b0e3290e53a669f595f928bd3465c5fcb6254be1a3fc00ce0288017890
...,...,...,...,...,...,...,...,...,...,...,...,...
5005,Zyra,14.8,EUW,Diamond+,A,0.3862,0.0264,0.0187,1032,2024-04-23 13:00:56,"https://www.metasrc.com/lol/euw//build/zyra?ranks=diamond,master,grandmaster,challenger",c82f5f9ff6f5e19a8fa0b96f0d07d396d956f3fe61b1b6d6f292be51f7dfcad5
5006,Zyra,14.7,EUW,Diamond+,B,0.5082,0.0247,0.0208,2055,2024-04-23 13:00:59,"https://www.metasrc.com/lol/euw/14.7/build/zyra?ranks=diamond,master,grandmaster,challenger",92c103b4cba2452c963b6334ba23cb1bb888f0603e54e93779b2c877bf503dd7
5007,Zyra,14.6,EUW,Diamond+,B,0.5242,0.0319,0.0271,2840,2024-04-23 13:01:00,"https://www.metasrc.com/lol/euw/14.6/build/zyra?ranks=diamond,master,grandmaster,challenger",6875946726155d806fdddc18ea8e7267107d99661bd442aac6e9706a89d6479b
5008,Zyra,14.5,EUW,Diamond+,B,0.5212,0.0337,0.0227,1735,2024-04-23 13:01:02,"https://www.metasrc.com/lol/euw/14.5/build/zyra?ranks=diamond,master,grandmaster,challenger",fa3c2673415c72394c0881a46a1bcf9bfb87dd099dab755367b7e89d986da380
