Import necessary files and libs:

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from files.lol_regions import regions
from files.lol_genders import genders
from files.lol_names import correction_roles

### __Champion gender:__

Create dataframe:

In [2]:
lol_df = pd.read_csv("files/data.csv")

Add gender to respective champions:

In [3]:
for k,v in genders.items():
    lol_df.loc[lol_df["champion"] == k, "gender"] = v

In [None]:
lol_df

### __Champion roles:__

Get list from API of all champions:

In [4]:
patch = "12.12.1"
ddragon = requests.get(f"http://ddragon.leagueoflegends.com/cdn/{patch}/data/en_US/champion.json").json()
champions_all_info = list(ddragon['data'].items())

Create list from API keys:

In [5]:
champions_keys = []
for info_tuple in champions_all_info:
    champions_keys.append(info_tuple[0])

In [None]:
champions_keys

Storage information about champíons:

In [8]:
champions_tuple = tuple(champions_all_info)

Create list with champions:

In [9]:
list_champions = []
for i in champions_tuple:
    list_champions.append(i[1]["name"])

Create list with champions and tags:

In [10]:
list_tags = []
for k,c in zip(champions_keys, list_champions):
    tag_champion = ddragon['data'][k]['tags']
    try:
        list_tags.append([c, f"{tag_champion[0]}, {tag_champion[1]}"])
    except:
        list_tags.append([c, f"{tag_champion[0]}"])

In [None]:
list_tags

Create dataframe:

In [12]:
tags_df = pd.DataFrame(list_tags, columns=["champion", "role"])

Apply name correction in dataframe::

In [13]:
for key, value in correction_roles.items():
    lol_df.loc[lol_df["champion"] == key, "champion"] = value

Merge dataframes:

In [14]:
lol_df = lol_df.merge(tags_df)

In [None]:
lol_df

### __Champion Stats with Selenium:__

**Note:**
- you need to install Selenium to proceed.
- Selenium will emulate an hidden browser in your PC.

In [16]:
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium import webdriver

In [None]:
options = webdriver.ChromeOptions()
options.headless = True

url = "https://na.op.gg/statistics/champions?hl=en_US&region=global"
driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)

driver.get(url)
driver.implicitly_wait(7)
driver.find_element(By.XPATH, '//*[@id="content-container"]/div[2]/table/tbody/tr[118]/td[2]')
html = driver.page_source
driver.close()

Transform html table into dataframe:

In [18]:
stats_df = pd.read_html(html)
stats_df = stats_df[1]

In [None]:
stats_df

Clean and organize column names:

In [19]:
stats_df.drop(["#", "CS", "Gold", "Games played", "KDA"], inplace=True, axis=1, errors='ignore')
stats_df.rename(
inplace=True,
columns= {"Champion": "champion", 
          "Win rate": "win_rate",
          "Pick ratio per game": "pick_rate",
          "Ban ratio per game": "ban_rate"})
          
stats_df.sort_values("champion", inplace=True, axis=0, ignore_index=True)

Merge dataframes:

In [20]:
lol_df = lol_df.merge(stats_df, sort=True)

In [None]:
stats_df

In [None]:
lol_df

### __Champion Region:__

Requests method - **Fast**:

In [21]:
def get_json_and_scrap(reg):
    url = f'https://universe-meeps.leagueoflegends.com/v1/en_gb/factions/{reg}/index.json'
    response = requests.get(url).json()

    region_name = response['faction']['name']
    region_members = response['associated-champions']

    for i in region_members:
        champ_name = i['title']
        champs_with_region.append([champ_name, region_name])
    return

Run multi-page scrapping:

In [22]:
champs_with_region = []
for region in regions:  #   ~13 seconds
    get_json_and_scrap(region)

---

Selenium method - **Slow** (Optional):

In [22]:
def get_html_and_scrap(reg):
    options = webdriver.ChromeOptions()
    options.headless = True
    
    driver = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=options)
    url = f"https://universe.leagueoflegends.com/en_US/region/{reg}/"

    driver.get(url)
    driver.implicitly_wait(5)
    driver.find_element(By.ID, 'champions-of-faction')
    html = driver.page_source
    driver.close()

    soup = BeautifulSoup(html, "html.parser")
    regions_champ = soup.find_all("div", class_ = "copy_xxN7")
    for champ in regions_champ:
        champs_with_region.append([champ.h1.text, champ.h2.text])
    return

Run multi-page scrapping:

In [None]:
champs_with_region = []
for region in regions:  #   ~3 minutes
    get_html_and_scrap(region)

#### __Let's continue:__

Create Dataframe:

In [24]:
scrap_region_df = pd.DataFrame(champs_with_region, columns=["champion","region"])

In [None]:
scrap_region_df

Create an function to organize dataframes:

In [26]:
def drop_and_sort_rows(dframe):
    dframe.drop_duplicates(subset="champion", keep="first", inplace=True)
    dframe.sort_values(by="champion",ignore_index=True, inplace=True)
    return

Organize and apply name correction:

In [27]:
scrap_region_df.champion.replace("’","'", regex=True, inplace=True)
drop_and_sort_rows(scrap_region_df)

Create template with organized champions:

In [28]:
main_champions = pd.DataFrame({"champion":lol_df.champion.unique()})
drop_and_sort_rows(main_champions)

Merge data to region Dataframe:

In [29]:
full_region_df = main_champions.merge(scrap_region_df, how="left", sort=True)
drop_and_sort_rows(full_region_df)

Add Runeterra for champions without region:

In [30]:
full_region_df.region.loc[full_region_df.region.isnull()] = "Runeterra"

Merge and organize dataframes:

In [31]:
lol_df = lol_df.merge(full_region_df, sort=True)

lol_info_df = lol_df.drop(["voice_line", "is_spoken"], axis=1)
drop_and_sort_rows(lol_info_df)

In [None]:
# Complete Dataframe with voices:
lol_df

In [None]:
# Complete dataframe without voices:
lol_info_df

---