Import necessary files and libs:

In [1]:
import requests as r
import pandas as pd
import time
from bs4 import BeautifulSoup
from files.lol_regions import regions
from files.lol_genders import genders
from files.lol_names import correction_roles

### __Champion gender:__

Create dataframe:

In [2]:
lol_df = pd.read_csv("files/data.csv")

Add gender to respective champions:

In [3]:
for k,v in genders.items():
    lol_df.loc[lol_df["champion"] == k, "gender"] = v

In [None]:
lol_df

### __Champion roles:__

Get list from API of all champions:

In [5]:
patch = "12.12.1"
ddragon = r.get(f"http://ddragon.leagueoflegends.com/cdn/{patch}/data/en_US/champion.json").json()
champions_all_info = list(ddragon['data'].items())

Create list from API keys:

In [7]:
champions_keys = []
for info_tuple in champions_all_info:
    champions_keys.append(info_tuple[0])

In [None]:
champions_keys

Storage information about champíons:

In [9]:
champions_tuple = tuple(champions_all_info)

Create list with champions:

In [10]:
list_champions = []
for i in champions_tuple:
    list_champions.append(i[1]["name"])

Create list with chompions and tags:

In [11]:
list_tags = []
for k,c in zip(champions_keys, list_champions):
    tag_champion = ddragon['data'][k]['tags']
    try:
        list_tags.append([c, f"{tag_champion[0]}, {tag_champion[1]}"])
    except:
        list_tags.append([c, f"{tag_champion[0]}"])

In [None]:
list_tags

Create dataframe:

In [14]:
tags_df = pd.DataFrame(list_tags, columns=["champion", "role"])

Apply name correction in dataframe::

In [18]:
for key, value in correction_roles.items():
    lol_df.loc[lol_df["champion"] == key, "champion"] = value

Merge dataframes:

In [19]:
lol_df = lol_df.merge(tags_df)

In [None]:
lol_df

### __Scrapping Stats with Selenium:__

**Note:**
- you need to install Selenium and Webdriver to proceed.
- Selenium will emulate an automation browser in your computer.

#### __Get HTML in two different ways:__

##### __● If you're running this file in Google Collab:__

In [None]:
pip install kora

In [None]:
from kora.selenium import wd
import time

In [None]:
wd.get("https://na.op.gg/statistics/champions?hl=en_US&region=global")
time.sleep(7) # Wait time to pull html
html = wd.page_source

##### __● If you're running this file in Linux:__

In [None]:
pip install selenium

In [None]:
pip install webdriver-manager

In [24]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://na.op.gg/statistics/champions?hl=en_US&region=global"

driver.get(url)
time.sleep(7) # Wait time to pull html
html = driver.page_source
driver.close()

##### __Let's continue:__

Transform html table into dataframe:

In [25]:
stats_df = pd.read_html(html)
stats_df = stats_df[1]

Clean and organize column names:

In [26]:
stats_df.drop(["#", "CS", "Gold", "Games played", "KDA"], inplace=True, axis=1, errors='ignore')
stats_df.rename(
inplace=True,
columns= {"Champion": "champion", 
          "Win rate": "win_rate",
          "Pick ratio per game": "pick_rate",
          "Ban ratio per game": "ban_rate"})
          
stats_df.sort_values("champion", inplace=True, axis=0, ignore_index=True)

Merge dataframes:

In [27]:
lol_df = lol_df.merge(stats_df, sort=True)

In [None]:
stats_df

In [None]:
lol_df

### __Scrapping Regions with Selenium:__

In [32]:
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver

In [33]:
def get_html_and_scrap(reg):
    options = webdriver.ChromeOptions()
    options.headless = True
    
    driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
    url = f"https://universe.leagueoflegends.com/en_US/region/{reg}/"

    driver.get(url)
    time.sleep(3)
    html = driver.page_source
    driver.close()

    soup = BeautifulSoup(html, "html.parser")
    regions_champ = soup.find_all("div", class_ = "copy_xxN7")
    for champ in regions_champ:
        champs_with_region.append([champ.h1.text, champ.h2.text])

    return pd.DataFrame(champs_with_region)

Run multi-page scrapping:

In [None]:
champs_with_region = []
scrap_region_df = list(map(get_html_and_scrap, regions))

Remove duplicated dataframes:

In [36]:
scrap_region_df = scrap_region_df[-1]

In [None]:
scrap_region_df

Organize scrap Dataframe:

In [37]:
scrap_region_df.rename({0:"champion", 1:"region"}, axis=1, inplace=True)
# Usar função drop_and_sort_rows:
scrap_region_df.drop_duplicates(subset="champion", keep="first", inplace=True)
scrap_region_df.sort_values(by="champion",ignore_index=True, inplace=True)

Apply name correction in champions:

In [39]:
scrap_region_df.champion.replace("’","'", regex=True, inplace=True)

Create template with organized champions:

In [40]:
main_champions = pd.DataFrame({"champion":lol_df.champion.unique()})
# Usar função drop_and_sort_rows:
main_champions.drop_duplicates(subset="champion", keep="first", inplace=True)
main_champions.sort_values(by="champion",ignore_index=True, inplace=True)

Merge data to region Dataframe:

In [41]:
full_region_df = main_champions.merge(scrap_region_df, how="left", sort=True)
# Usar função drop_and_sort_rows:
full_region_df.drop_duplicates(subset="champion", keep="first", inplace=True)
full_region_df.sort_values(by="champion", ignore_index=True, inplace=True)

Add Runeterra for champions without region:

In [42]:
full_region_df.region.loc[full_region_df.region.isnull()] = "Runeterra"

Merge dataframes:

In [45]:
lol_df = lol_df.merge(full_region_df, sort=True)

In [None]:
full_region_df

In [None]:
lol_df

---