In [1]:
import cassiopeia as cass
import constants
import random
import requests
from bs4 import BeautifulSoup
from bs4 import Comment
import pandas as pd
import re
import math
from urllib.parse import unquote

# Test Main Code

In [30]:
# Functions to collect player info from top 4% of league of legends players
def get_opgg_info(opgg_url: str) -> BeautifulSoup:
    """Generates a bs4 html text object"""
    opgg_response = requests.get(opgg_url)
    opgg_response.encoding = 'utf-8'
    opgg_html = opgg_response.text
    
    return BeautifulSoup(opgg_html, "html.parser")


def opgg_page_count(opgg_text: BeautifulSoup) -> int:
    total_player_div = opgg_text.find_all('div', class_='css-1a7bwbq ef3mb1o0')
    total_player_count = int("".join(re.search(r'(?<=Total )\S+', str(total_player_div)).group().split(',')))
    high_elo_count = math.floor(total_player_count * 0.04)  # Get top 4% of players from KR player base
    num_pages = math.ceil(high_elo_count / 100)
    
    return num_pages

# Not to be included in final code
def get_usernames_taglines(hyperlink_string):
    """Generates username and tagline for league players

    Args:
        hyperlink_list (list): hyperlinks for players extracted from HTML
    Returns:
        user_info (dict): A dictionary of usernames and taglines
    """
    player_info = re.search(r'(?<=kr\/)\S+(?=">)', str(hyperlink_string)).group()
    # Splits decoded username and tagline
    decoded_username_tagline = unquote(player_info).split('-')
    
    return {decoded_username_tagline[0]:decoded_username_tagline[1]}

In [31]:
# Function to get player usernames and taglines
def create_kr_player_dict(opgg_html_link: str, max_pages=0) -> dict:
    # Calculate number of pages to parse
    opgg_text = get_opgg_info(opgg_html_link)
    num_webpages = opgg_page_count(opgg_text)
    player_search = re.compile(r'(?<=kr\/)\S+(?=">)')

    player_dict = {}

    # FOR TESTING ONLY
    if max_pages > 0:
        num_webpages = max_pages
    # FOR TESTING ONLY

    for page in range(1, num_webpages+1):
        # Gets data from specific page, replacing page 1 with page
        opgg_page = get_opgg_info(re.sub(r'\d', str(page), opgg_html_link))
        player_info_hyperlinks = opgg_page.find_all('a', class_='summoner-link')

        for player in player_info_hyperlinks:
            player_name_tagline = player_search.search(str(player)).group()
            decoded_name_tagline = unquote(player_name_tagline).split('-')

            # Key is a player's username, value is a player's tagline
            player_dict.update({decoded_name_tagline[0]:decoded_name_tagline[1]})
    
    return player_dict

In [32]:
# Test gathering data from OP.GG on top KR players
test_dict = create_kr_player_dict('https://www.op.gg/leaderboards/tier?region=kr&type=ladder&page=1', 2)

## Web-scraping for top League of Legends players

In [33]:
# For op.gg, need to specify the User-Agent
opgg_url = 'https://www.op.gg/leaderboards/tier?region=kr&type=ladder&page=1'
opgg_response = requests.get(opgg_url)
opgg_response.encoding = 'utf-8'
opgg_html = opgg_response.text
opgg_soup = BeautifulSoup(opgg_html, "html.parser")

In [34]:
# Get number of KR players from op.gg
total_player_div = opgg_soup.find_all('div', class_='css-1a7bwbq ef3mb1o0')
total_player_count = int("".join(re.search(r'(?<=Total )\S+', str(total_player_div)).group().split(',')))
high_elo_count = math.floor(total_player_count * 0.04)  # Get top 4% of players from KR player base
num_pages = math.ceil(high_elo_count / 100)
num_pages

683

In [35]:
# Find player names
user_info_hyperlinks = opgg_soup.find_all('a', class_='summoner-link')

In [36]:
# For testing functionality of cassiopeia
test_list = user_info_hyperlinks[0:5]

In [37]:
# Creates a dictionary of unique player:tagline key:value pairs
player_data_dict = {}
for player in test_list:  # Replace test_list with user_info_hyperlinks in final code
    player_info = get_usernames_taglines(player)
    player_data_dict.update(player_info)

## Testing Cassiopeia and RiotWatcher for Riot API interface

...

## Unsatisfied with the weird/inconsistent functionality of Cassiopeia and RiotWatcher, I've opted to hand-make my own functions for pulling data from Riot's API

### Pros
- Ensure simple functionality by parsing JSONs into usable forms
- Should be able to grab all necessary data

### Cons
- Need to ensure calls to Riot API do not exceed 20 requests/second OR 100 requests/2 minutes