In [51]:
import urllib.request
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
import csv
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from ipywidgets import widgets, interactive
import time
import re

In [2]:
import plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode

# Fire Emblem: Heroes Data Scraping

## Introduction

In this project, we will be taking a look at character data from the popular mobile game *Fire Emblem: Heroes* available on iPhone and Android. Fire Emblem, a strategy RPG franchise developed by Intelligent System, added this mobile game to its list of entries following the popularity of recent titles *Fire Emblem: Awakening* and *Fire Emblem: Fates* for the 3DS. In this entry, you are in the position of a tactician, deploying up to four characters from the Fire Emblem Universe to do battle against the opposing enemy team. The game utilizes a rock-paper-scissors battle mechanic, with some weapons being superior to others. 

Like many popular mobile games, *Fire Emblem: Heroes* is a free-to-play Gacha game, where characters are drawn from a random pool, each with varying rarity and power. 


## Web-Scraping (Using Selenium)

We begin this project by obtaining the character data, using data hosted on gamepress.com (I do not own the data). A note before you ever webscrape, a helpful tip to know if you are allowed to scrape data from a web domain is to type _/robots.txt_ at the end of website link. 

For example, we will type:  
https://www.gamepress.com/robots.txt

And we see that we have full access for scraping.

If you need a reference for what the symbols on the robots.txt page, visit [this website](https://www.promptcloud.com/blog/how-to-read-and-respect-robots-file/):

We will be utilizing a Python package called Selenium to assist us with web-scraping duties. We will use the package to help us obtain information regarding how the characters are rated according to GamePress. We will use this data later on to build a prediction model.

To proceed, you will need to download a driver. I am using chrome, so I downloaded a chrome driver from [here](https://chromedriver.chromium.org/)

### General Character Information

We will start off by getting general character information, which includes their name, their stats, what type of weapon they use, what their character type, and their tier according to the GamePress Offensive Tier List. 

In [7]:
ser = Service("./chromedriver")
op = webdriver.ChromeOptions()
browser = webdriver.Chrome(service=ser, options=op)

gg = "https://gamepress.gg/feheroes/heroes"
browser.get(gg)
respData = browser.page_source
browser.close()
nsoup = BeautifulSoup(respData, 'html.parser')
n_table = nsoup.find("table", id = "heroes-new-list")

In [8]:
gpress = []
hp = []
attack = []
speed = []
defense = []
res = []
total = []
color = []
weap = []
mov = []
rar = []
urls = []
other = []


for group in n_table.findAll("tr")[1::3]:
    #tier.append(group.findAll("td")[13].find(text = True))
    gpress.append(group.attrs.get("data-name"))
    hp.append(group.attrs.get("data-hp"))
    attack.append(group.attrs.get("data-atk"))
    speed.append(group.attrs.get("data-spd"))
    defense.append(group.attrs.get("data-def"))
    res.append(group.attrs.get("data-res"))
    total.append(group.attrs.get("data-total"))
    color.append(group.attrs.get("data-element").split()[0])
    weap.append(group.attrs.get("data-element").split()[-1])
    # data-cat-1="331" - infantry
    # data-cat-1="306" - cavalier
    # data-cat-1="326" - armor
    # data-cat-1="316" - flying
    mov.append(group.attrs.get("data-cat-1"))
    rar.append(group.attrs.get("data-stars"))
    
    link = group.findAll("td")[0].find("a")["href"]
    url = "https://gamepress.gg" + link
    urls.append(url)
    other.append(group.attrs.get("data-cat-"))

dfn = pd.DataFrame(gpress, columns = ["Name"])
#dfn["Tier"] = tier
dfn["Color"] = color
dfn["Weapon"] = weap
dfn["MoveCode"] = mov
dfn["Rarities"] = rar
dfn["HP"] = hp
dfn["Atk"] = attack
dfn["Spd"] = speed
dfn["Def"] = defense
dfn["Res"] = res
dfn["Total"] = total

In [9]:
dfn.head()

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total
0,Sharena,Blue,Lance,331,4_Star_Story,43,32,32,29,22,158
1,Flame Emperor,Green,Axe,326,3-4_Grand_Hero_Battle,50,40,25,37,26,178
2,Rath,Green,Bow,306,3-4,41,33,35,23,24,156
3,Fiora,Blue,Lance,316,stars-5-Star,38,32,38,21,37,166
4,Leila,Gray,Dagger,331,stars-5-Star,40,36,41,23,26,166


In [10]:
# for aether raids tier list - get href and tier 
ser = Service("./chromedriver")
op = webdriver.ChromeOptions()
# below two lines allow us to initiate "Headless Mode", which means that selenium works silently
op.add_argument('--headless')
op.add_argument('--disable-gpu')
browser = webdriver.Chrome(service=ser, options=op)
tlist = ("https://gamepress.gg/feheroes/aether-raids-tier-list")
browser.get(tlist)
tData = browser.page_source
browser.close()
tsoup = BeautifulSoup(tData, 'html.parser')

In [11]:
char_dict = {}
i = 0
for field in tsoup.find_all('div', attrs={'class':'field__item'}):
    if field.find('div', id=lambda x: x and x.startswith('tier-')):
        i += 1
        for in_tier in field.find_all('div', attrs = {'class': 'tier-list-cell-row'}):
            unit = in_tier.attrs.get("data-title")
            char_dict[unit] = int(i)
print("There are " + str(i) + " classes in this tier-list")

There are 5 classes in this tier-list


In [13]:
tiers = pd.DataFrame.from_dict(char_dict, orient = "index", columns = ["Tier"])
dfn = pd.merge(dfn, tiers, left_on = "Name", right_index = True, how = "left")

### Specific Character Details

What we will do next is to parse through each character's stat page on GamePress to grab information that we believe is useful to help predict a character's tier, along with other details which can serve a useful purpose for other projects.

We currently grab the following details:
* The tier itself
* Game origin
* Whether characater is a legendary hero
* Whether character is a duo hero
* Whether character can refine their weapon
* Whether character has a unique weapon
* Link for hero image
* Character movement type
* Banners in which the character shows up in

NOTE: Running the cell below will take quite a lot of time (when I last ran it, it took >30 minutes. So have something to do while you wait - like playing Fire Emblem: Heroes.

10:13 - 11:48
(as of now, the buffer alone is like 1.25 hours...)

Link for limited: https://gamepress.gg/feheroes/limited

In [15]:
ser = Service("./chromedriver")
op = webdriver.ChromeOptions()
# below two lines allow us to initiate "Headless Mode", which means that selenium works silently
op.add_argument('--headless')
op.add_argument('--disable-gpu')
browser = webdriver.Chrome(service=ser, options=op)

all_char = []

for needed in urls:

    browser.get(needed)
    chardata = browser.page_source
    
    # has all of the html data
    soupy = BeautifulSoup(chardata, 'html.parser')
    all_char.append(soupy)
        
    # buffer for web driver
    time.sleep(4)
    
    #group.findAll("td")[12] is total
browser.quit()

In [16]:
title = []
for char in all_char:
    
    # get hero title
    char_title = ""
    for span in char.find("table", id = "hero-details-table").findAll("span"):
        char_title += span.text
    title.append(char_title)
dfn["Title"] = title

In [258]:
origin = []
for char in all_char:
    try:
        attempt_orig = char.find("div", {"class": 
         "field field--name-field-origin field--type-entity-reference field--label-hidden field__items"})
        games = attempt_orig.findAll("div", {"class": 'field__item'})
        if len(games) == 1:
            origin.append(games[0].text)
        else:
            game_lst = []
            for game in attempt_orig.findAll("div", {"class": 'field__item'}):
                game_lst.append(game.text)
            origin.append(game_lst)
    except AttributeError:
        origin.append("None")

dfn["Origin"] = origin

In [195]:
title = []
w_upgrades = []
personal = []
origin = []
img = []
is_legend = []
move_t = []
is_duo = []
col2 = []
weap2 = []
# below are new additions
tier = []
# banner can be a list to easily get banner count
banner = []
num_banner = []

for char in all_char:
    
    # get hero title
    char_title = ""
    for span in char.find("table", id = "hero-details-table").findAll("span"):
        char_title += span.text
    title.append(char_title)
    
    weapinfo = char.find("div", id = "weapon-skills")
    
    # retrieves whether the character has weapon-refines
    if weapinfo.findAll("div", {"id": "weapon-upgrades-section"})[0].find(
        "div", {"class": "view-content"}) is not None:
        w_upgrades.append(True)
    else:
        w_upgrades.append(False)

    # retrieves whether the character has a personal, non-inheritable weapon
    i = 0
    per = False
    for item in weapinfo.findAll("div", {"class": "views-element-container"
        })[0].findAll("tr"):
        if i == 0:
            i += 1
            continue
        if not item.findAll("div"):
            continue
        if "Non-Inheritable skill" in item.findAll("div")[-1].get_text():
            per = True
            break
    if per is False:
        personal.append(False)
    else:
        personal.append(True)

    # get origin information
    # need to start taking into account duos with origin of multiple games
    try:
        attempt_orig = char.find("div", {"class": 
         "field field--name-field-origin field--type-entity-reference field--label-hidden field__items"})
        games = attempt_orig.findAll("div", {"class": 'field__item'})
        if len(games) == 1:
            origin.append(games[0].text)
        else:
            game_lst = []
            for game in attempt_orig.findAll("div", {"class": 'field__item'}):
                game_lst.append(game.text)
            origin.append(game_lst)
    except AttributeError:
        origin.append(None)
        
    # get hero image link
    image = char.find("div", id = "hero-image").find("img")["src"]
    img.append("https://gamepress.gg" + image)

    att = char.find("div", id = "hero-atts")
    
    # get information about whether character is legendary/mythic
    is_legend.append(att.find("a", {"class": 
        "tipso-legendary"}) is not None)
    
    # get move type information
    move = att.find("div", {"class": 
        "field field--name-field-movement field--type-entity-reference field--label-hidden field__item"})
    move_t.append(move.get_text().replace("\n", "").replace(" ", ""))
    
    # get information about whether character is a duo character
    is_duo.append(char.find("div", {"class": "duo-skill-effect"}) is not None)
    
    # sanity check for color/weapon
    w_use = att.find("div", {"class": 
        "field field--name-field-attribute field--type-entity-reference field--label-hidden field__item"})
    colour, weapon = w_use.get_text().replace("\n", "").split()
    col2.append(colour)
    weap2.append(weapon)
    
    # get information regarding banners
    banner_h3 = char.find("h3", text="Banners Featured In")
    char_banner = []
    for row in banner_h3.find_next_siblings("div")[0].findAll('td'):
        char_banner.append(row.find("a").text)
    banner.append(char_banner)
    num_banner.append(len(char_banner))

In [196]:
dfn["Title"] = title
dfn["Refines"] = w_upgrades
dfn["Personal Weapon"] = personal
dfn["Legendary/Mythic"] = is_legend
dfn["Origin"] = origin
dfn["Duo"] = is_duo
dfn["Image"] = img
dfn["Movement"] = move_t
dfn["Banners"] = banner
dfn["Number of Banners"] = num_banner

If you looked carefully above, you may have noticed a "sanity check" that I put in. That is because there was an interesting discrepency that I noticed when trying to count up the units grouped by color and weapon. We will compare them below using the data from the general hero page and each hero's specific page:

In [197]:
dw = pd.DataFrame(col2, columns = ["Color"])
dw["Weapon"] = weap2

dw.groupby(by = ["Color", "Weapon"]).size()

Color  Weapon
Blue   Beast       7
       Bow         9
       Dagger      5
       Dragon     15
       Lance     113
       Tome       57
Gray   Beast      10
       Bow        49
       Dagger     39
       Dragon      8
       Staff      47
       Tome        7
Green  Axe        91
       Beast       8
       Bow        12
       Dagger      6
       Dragon     10
       Tome       53
Red    Beast       8
       Bow         8
       Dagger      8
       Dragon     14
       Sword     123
       Tome       56
dtype: int64

In [198]:
dfn.groupby(by = ["Color", "Weapon"]).size()

Color  Weapon
Blue   Beast       7
       Bow         9
       Dagger      5
       Dragon     15
       Lance     113
       Tome       57
Gray   Axe         1
       Beast      10
       Bow        49
       Dagger     39
       Dragon      8
       Staff      47
       Tome        7
Green  Axe        90
       Beast       8
       Bow        12
       Dagger      6
       Dragon     10
       Tome       53
Red    Beast       8
       Bow         8
       Dagger      8
       Dragon     14
       Sword     123
       Tome       56
dtype: int64

The counts almost match up, but there is an instance where under Gray units, there is an axe user. This may not seem like much, but in the game, axe users have always been green up until now. This required a little double checking to see who it was, and after seeing who it was, we saw that there was an error when running the script for the general character page. We changed the color column to reflect this. To fix this, we will use the color column from GamePedia.

In [199]:
dfn[(dfn["Color"] == "Gray") & (dfn["Weapon"] == "Axe")]

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners
732,Summer Innes,Gray,Axe,316,stars-5-Star,42,33,37,30,20,162,5.0,Summer Innes - Flawless Form,True,False,False,Fire Emblem: The Sacred Stones,False,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,"[Summer's Arrival, Legendary Hero: Hríd - Icy ...",3


In [205]:
# sanity check to change colors
dfn["Color"] = col2

Another such important trait is whether a character is a particular unit class called a "refresher", which allows said character to "refresh" a character, granting them another move on your turn. This is quite the offensive boon, as giving your strongest character the opportunity to attack not one but TWO characters in your turn can turn the tide of battle in an instant.

In the game, there are two basic refreshing skills: dance, and sing. Thus, we will scrape for all characters who are capable of using either skill.

In [201]:
# to get list of refresher characters

refresh = []
dsurls = ["https://gamepress.gg/feheroes/command-skills/dance",
         "https://gamepress.gg/feheroes/command-skills/sing"]

ser = Service("./chromedriver")
op = webdriver.ChromeOptions()
# below two lines allow us to initiate "Headless Mode", which means that selenium works silently
op.add_argument('--headless')
op.add_argument('--disable-gpu')
brow = webdriver.Chrome(service=ser, options=op)
for lin in dsurls:
    brow.get(lin)
    ddata = brow.page_source
    dsoup = BeautifulSoup(ddata, "html.parser")
    since = dsoup.find("div", id = "block-gamepressbase-content")
    char = since.find("div", {"class": "views-element-container"}).findAll("a")
    for it in char[1::2]:
        refresh.append(it.get_text())
brow.quit()

In [202]:
# creates a column with a binary indicator of whether a unit is a refresher
ref = dfn["Name"].apply(lambda x: x in refresh)
dfn2 = dfn.copy()
dfn2["Refresher"] = ref

In [203]:
# goes through the 'Rarities' column to create two features: one for how the character is obtained in the game, and one for actual rarity

rar = []
ob = []
for row in dfn2["Rarities"]:
    st = []
    a = row.split("_")[0]
    if "Story" in row:
        ob.append("Story")
    elif "Grand_Hero_Battle" in row:
        ob.append("GHB")
    elif "Tempest_Trials" in row:
        ob.append("TT")
    elif "Enemy_Only" in row:
        ob.append("Enemy-Only")
    elif "Legacy" in row:
        ob.append("Legacy")
    else:
        ob.append("NA")
    for i in a.split("-"):
        if "2" in i or "3" in i or "4" in i or "5" in i:
            st.append(i)
    rar.append(st)

In [204]:
dfn2["Stars"] = rar
dfn2["Obtain"] = ob

If we look carefully at the data, we will notice that some of the characters are missing origin information. Until Gamepress.gg fixes this, we will resort to doing this manually.

In [396]:
dfn[dfn["Origin"] == "None"]

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners
236,Legendary Byleth (M),Green,Tome,331,stars-5-Star,40,39,41,25,21,166,1.0,Legendary Byleth (M) - The Fódlan Star,False,True,True,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Legendary Hero (Byleth: The Fódlan Light), My...",3
445,Brave Lucina,Blue,Lance,331,5_Legacy,41,34,36,27,19,157,1.0,Brave Lucina - Brave Princess,True,True,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Weekly Revival 19, Weekly Revival 6, Year-One...",9
463,Performing Olivia,Gray,Dagger,331,stars-5-Star,34,28,34,16,28,140,3.0,Performing Olivia - Festival Dancer,True,False,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Performing Arts, Ephraim Legendary Lord, Perf...",3
583,Halloween Dorcas,Green,Axe,326,4-5_Event:_Tempest_Trials,49,41,25,37,21,173,4.0,Halloween Dorcas - Pumpkin Smasher,True,False,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,[],0
630,Keaton,Red,Beast,331,stars-5-Star,45,41,24,38,19,167,3.0,Keaton - Lupine Collector,False,True,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Kitsune and Wolfskin],1
631,Velouria,Gray,Beast,331,stars-5-Star,40,35,37,33,23,168,1.0,Velouria - Wolf Cub,False,True,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Heroes with Close Defense, Mythic Hero (Naga:...",3
632,Kaden,Green,Beast,306,stars-5-Star,38,32,36,25,25,156,2.0,Kaden - Kitsune Braggart,False,True,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,"[Legendary Hero (Julia: Crusader of Light), Ki...",2
675,New Year Azura,Green,Axe,316,stars-5-Star,37,30,35,22,26,150,3.0,New Year Azura - Celebratory Spirit,True,False,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,"[Happy New Year!, Legendary Hero, New Years Ba...",3
686,Hector (LA),Green,Axe,326,stars-5-Star,50,38,21,40,24,173,2.0,Hector (LA) - Just Here to Fight,True,True,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,"[Love Abounds, Marth: Hero-King, Love Abounds]",3
687,Lyn (LA),Blue,Tome,326,stars-5-Star,37,31,36,27,34,165,3.0,Lyn (LA) - Wind's Embrace,True,False,False,,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,"[Love Abounds, Legendary Hero: Hector - Marque...",3


In [263]:
dfn2[dfn2["Name"] == "Hatari Azura"]

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain
267,Hatari Azura,Green,Tome,306,stars-5-Star,37,37,39,16,23,152,,Hatari Azura - Hatari Duet,False,True,False,"[Fire Emblem: Path of Radiance, Fire Emblem Fa...",True,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,[Of Lost Kingdoms],1,True,[5],


In [369]:
#dfn2[dfn2["Name"] == "Brave Lucina"].Origin = "Fire Emblem: Awakening"
dfn2.loc[dfn2["Name"] == "Brave Lucina", "Origin"] = "Fire Emblem: Awakening"
dfn2.loc[dfn2["Name"] == "Halloween Dorcas", "Origin"] = "Fire Emblem: The Blazing Blade"
dfn2.loc[dfn2["Name"] == "Halloween Nowi", "Origin"] = "Fire Emblem: Awakening"
dfn2.loc[dfn2["Name"] == "Halloween Sakura", "Origin"] = "Fire Emblem Fates"
dfn2.loc[dfn2["Name"] == "Hector (LA)", "Origin"] = "Fire Emblem: The Blazing Blade"
dfn2.loc[dfn2["Name"] == "Kaden", "Origin"] = "Fire Emblem Fates"
dfn2.loc[dfn2["Name"] == "Keaton", "Origin"] = "Fire Emblem Fates"
dfn2.loc[dfn2["Name"] == "Lute", "Origin"] = "Fire Emblem: The Sacred Stones"
dfn2.loc[dfn2["Name"] == "Lyn (LA)", "Origin"] = "Fire Emblem: The Blazing Blade"
dfn2.loc[dfn2["Name"] == "New Year Azura", "Origin"]= "Fire Emblem Fates"
dfn2.loc[dfn2["Name"] == "Performing Olivia", "Origin"] = "Fire Emblem: Awakening"
dfn2.loc[dfn2["Name"] == "Velouria", "Origin"] = "Fire Emblem Fates"
dfn2.loc[dfn2["Name"] == "Legendary Byleth (M)", "Origin"] = "Fire Emblem: Three Houses"

As the website gets updated with new characters, we will notice that not all of the characters have an assigned Tier. Let's run the script below to see which of the characters fall into this category. The below scripts will change the value so that it is easier to visualize when grouping.

In [208]:
dfn2[dfn2["Tier"].isnull() == True]

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain
267,Hatari Azura,Green,Tome,306,stars-5-Star,37,37,39,16,23,152,,Hatari Azura - Hatari Duet,False,True,False,Fire Emblem: Path of Radiance,True,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,[Of Lost Kingdoms],1,True,[5],
268,Hatari Karla,Blue,Lance,331,stars-5-Star,41,40,42,31,28,182,,Hatari Karla - Sun-Piercing Steel,False,True,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Of Lost Kingdoms],1,False,[5],
269,Hatari Nailah,Red,Beast,331,stars-5-Star,40,39,43,35,25,182,,Hatari Nailah - Hatari Scorcher,False,True,False,Fire Emblem: Radiant Dawn,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Of Lost Kingdoms],1,False,[5],
270,Hatari Xane,Gray,Staff,331,4-5,47,30,36,23,35,171,,Hatari Xane - Desert Mirage,False,False,False,Fire Emblem: Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Of Lost Kingdoms],1,False,"[4, 5]",
762,Hatari Deen,Gray,Dagger,306,4-5_Event:_Tempest_Trials,43,36,39,27,23,168,,Hatari Deen - Bladed Sandstorm,True,False,False,Fire Emblem Echoes,False,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,[],0,False,"[4, 5]",TT


In [209]:
dfn2["Tier"] = dfn2["Tier"].fillna("None")

In [210]:
dfn2.groupby(by = "Tier").size()

Tier
1.0      74
2.0     194
3.0     217
4.0     158
5.0     115
None      5
dtype: int64

In [34]:
# to get release date of characters
ser = Service("./chromedriver")
op = webdriver.ChromeOptions()
# below two lines allow us to initiate "Headless Mode", which means that selenium works silently
op.add_argument('--headless')
op.add_argument('--disable-gpu')
browser = webdriver.Chrome(service=ser, options=op)
r_list = ("https://feheroes.fandom.com/wiki/List_of_Heroes")
browser.get(r_list)
releaseData = browser.page_source
browser.close()
release_soup = BeautifulSoup(releaseData, 'html.parser')

In [35]:
release_table = release_soup.find_all('tr', attrs={'class':'hero-filter-element'})

In [96]:
# this function adjusts the title so that it matches with the GamePress data, allowing
# for merging

def title_fix(title):
    print(title)
    title_a, title_b = title.split(":")
    title_a = title_a.split(" ")[-1]
    return title_a + " :" + title_b

# title_fix(release_table[0].find_all('td')[1].text)

#.text.split(':')[0].split(" ")[-1]

Abel: The Panther


'Abel: The Panther'

In [211]:
release_data_dict = {}
origin_data_dict = {}

# based on scraping that shows differences in used titles - this dictionary fixes the following needed changes
    # Tethys: typo ("Dancer")
    # Canas: "Wisdom Seeker in GPedia"
    # Hatari - no info
    # Niles - the word "be" is capitalized in Gpedia
    # Eliwood - Marquess Pherae in GPedia
    # Winter Tharja - quotes around "Normal Girl" in Gpedia
title_change_dict = {"Niles - Cruel to Be Kind": "Niles - Cruel to be Kind", 
                     "Eliwood - Marquess Pherae": "Eliwood - Marquess of Pherae",
                    'Tharja - "Normal Girl"': "Tharja - Normal Girl",
                    "Canas - Wisdom Seeker": "Canas - Seeker of Wisdom"}

for char in release_table:
    title = char.findAll("td")[1].text
    title = title.replace(":", " -")
    title = re.sub(r'\([^)]*\)', '', title)
    if title in title_change_dict.keys():
        title = title_change_dict[title]
    release_date = char.findAll("td")[7].text
    origin = char.findAll("td")[2].text
    release_data_dict[title] = release_date
    origin_data_dict[title] = origin

In [404]:
dfn2[dfn2["Name"].str.contains("\(")]

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain
52,Summer Byleth (F),Red,Tome,316,stars-5-Star,40,36,42,16,21,155,2.0,Summer Byleth (F) - Fell Star's Duo,False,True,False,Fire Emblem: Three Houses,True,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,[Overseas Memories],1,False,[5],
57,Kris (M),Red,Sword,331,stars-5-Star,40,37,40,30,25,172,2.0,Kris (M) - Unknown Hero,False,True,False,Fire Emblem: New Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Heroes, Light and Shadow]",1,False,[5],
60,Kris (F),Blue,Lance,331,stars-5-Star,40,37,40,30,25,172,2.0,Kris (F) - Unsung Hero,False,True,False,Fire Emblem: New Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Heroes, Light and Shadow]",1,False,[5],
79,Fallen Corrin (M),Blue,Dragon,331,stars-5-Star,42,38,38,33,26,177,2.0,Fallen Corrin (M) - Bloodbound Beast,False,True,False,Fire Emblem Fates,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Dark Burdens],1,False,[5],
100,Legendary Corrin (F),Gray,Dragon,331,stars-5-Star,41,38,40,34,29,182,2.0,Legendary Corrin (F) - Child of Dusk,False,True,True,Fire Emblem Fates,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Legendary Eitri & Mythic Thórr, Mythic Hero (...",5,False,[5],
116,Byleth (F),Red,Sword,331,stars-5-Star,40,34,40,33,21,168,2.0,Byleth (F) - Proven Professor,False,True,False,Fire Emblem: Three Houses,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Mythic Hero (Líf), Three Houses]",2,False,[5],
117,Byleth (M),Red,Sword,331,stars-5-Star,40,36,38,34,20,168,2.0,Byleth (M) - Tested Professor,False,True,False,Fire Emblem: Three Houses,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Mythic Hero (Girl on the Throne Sothis)],1,False,[5],
134,Fallen Corrin (F),Gray,Dragon,331,stars-5-Star,43,35,37,31,27,173,3.0,Fallen Corrin (F) - Wailing Soul,False,True,False,Fire Emblem Fates,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Legendary Hero (Chrom: Crowned Exalt), Mythi...",4,False,[5],
136,Fallen Tiki (Young),Gray,Dragon,326,stars-5-Star,45,36,34,37,28,180,1.0,Fallen Tiki (Young) - Torpid Dragon,False,True,False,Fire Emblem: Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,"[Legendary Hero (Leif: Unifier of Thracia), He...",3,False,[5],
143,Naga (Unit),Blue,Dragon,316,stars-5-Star,37,30,36,33,26,162,3.0,Naga (Unit) - Dragon Divinity,False,False,True,"[Fire Emblem: Awakening, Fire Emblem: Mystery ...",False,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,"[Legendary Hero (Micaiah: Radiant Queen), Lege...",6,False,[5],


In [None]:
# (YT), (LA), (SF), (WT), (WF), (Unit)

In [264]:
# a typo found in Gamepress's data
dfn2.loc[dfn2["Name"] == "Tethys", "Title"] = "Tethys - Beloved Dancer"

In [374]:
dfn3 = dfn2.copy()

def title_fix(title):
    to_ignore = ["Flame Emperor", "Black Knight", "Death Knight"]
    title_a, title_b = title.split(" -")
    if title_a in to_ignore:
        return title_a + " -" + title_b
    else:
        title_a = title_a.split(" ")[-1]
    return title_a + " -" + title_b

dfn3['Title'] = dfn3['Title'].apply(lambda x: re.sub(r'\([^)]*\) ', '', x)).apply(title_fix)
# split Title into two by the '-'. split the first half by space

release_df = pd.DataFrame.from_dict(release_data_dict, orient = "index", columns = ["Release Date"])
dfn3 = pd.merge(dfn3, release_df, left_on = "Title", right_index = True, how = "left")
origin_df = pd.DataFrame.from_dict(origin_data_dict, orient = "index", columns = ["Origin - GPedia"])
dfn3 = pd.merge(dfn3, origin_df, left_on = "Title", right_index = True, how = "left")

In [402]:
dfn2[dfn2["Name"].str.contains("\(")]

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain
52,Summer Byleth (F),Red,Tome,316,stars-5-Star,40,36,42,16,21,155,2.0,Summer Byleth (F) - Fell Star's Duo,False,True,False,Fire Emblem: Three Houses,True,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,[Overseas Memories],1,False,[5],
57,Kris (M),Red,Sword,331,stars-5-Star,40,37,40,30,25,172,2.0,Kris (M) - Unknown Hero,False,True,False,Fire Emblem: New Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Heroes, Light and Shadow]",1,False,[5],
60,Kris (F),Blue,Lance,331,stars-5-Star,40,37,40,30,25,172,2.0,Kris (F) - Unsung Hero,False,True,False,Fire Emblem: New Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Heroes, Light and Shadow]",1,False,[5],
79,Fallen Corrin (M),Blue,Dragon,331,stars-5-Star,42,38,38,33,26,177,2.0,Fallen Corrin (M) - Bloodbound Beast,False,True,False,Fire Emblem Fates,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Dark Burdens],1,False,[5],
100,Legendary Corrin (F),Gray,Dragon,331,stars-5-Star,41,38,40,34,29,182,2.0,Legendary Corrin (F) - Child of Dusk,False,True,True,Fire Emblem Fates,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Legendary Eitri & Mythic Thórr, Mythic Hero (...",5,False,[5],
116,Byleth (F),Red,Sword,331,stars-5-Star,40,34,40,33,21,168,2.0,Byleth (F) - Proven Professor,False,True,False,Fire Emblem: Three Houses,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Mythic Hero (Líf), Three Houses]",2,False,[5],
117,Byleth (M),Red,Sword,331,stars-5-Star,40,36,38,34,20,168,2.0,Byleth (M) - Tested Professor,False,True,False,Fire Emblem: Three Houses,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Mythic Hero (Girl on the Throne Sothis)],1,False,[5],
134,Fallen Corrin (F),Gray,Dragon,331,stars-5-Star,43,35,37,31,27,173,3.0,Fallen Corrin (F) - Wailing Soul,False,True,False,Fire Emblem Fates,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,"[Legendary Hero (Chrom: Crowned Exalt), Mythi...",4,False,[5],
136,Fallen Tiki (Young),Gray,Dragon,326,stars-5-Star,45,36,34,37,28,180,1.0,Fallen Tiki (Young) - Torpid Dragon,False,True,False,Fire Emblem: Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,"[Legendary Hero (Leif: Unifier of Thracia), He...",3,False,[5],
143,Naga (Unit),Blue,Dragon,316,stars-5-Star,37,30,36,33,26,162,3.0,Naga (Unit) - Dragon Divinity,False,False,True,"[Fire Emblem: Awakening, Fire Emblem: Mystery ...",False,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,"[Legendary Hero (Micaiah: Radiant Queen), Lege...",6,False,[5],


In [375]:
pd.set_option('display.max_columns', None)
dfn3.head()

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain,Release Date,Origin - GPedia
0,Sharena,Blue,Lance,331,4_Star_Story,43,32,32,29,22,158,5.0,Sharena - Princess of Askr,True,True,False,Fire Emblem Heroes,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[],0,False,[4],Story,2017-02-02,Fire Emblem Heroes
1,Flame Emperor,Green,Axe,326,3-4_Grand_Hero_Battle,50,40,25,37,26,178,4.0,Flame Emperor - Bringer of War,True,False,False,Fire Emblem: Three Houses,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,[],0,False,"[3, 4]",GHB,2020-03-09,Fire Emblem: Three Houses
2,Rath,Green,Bow,306,3-4,41,33,35,23,24,156,3.0,Rath - Wolf of Sacae,True,False,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,[The Dread Isle],1,False,"[3, 4]",,2020-02-17,Fire Emblem: The Blazing Blade
3,Fiora,Blue,Lance,316,stars-5-Star,38,32,38,21,37,166,5.0,Fiora - Airborne Warrior,True,False,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,[The Dread Isle],1,False,[5],,2020-02-17,Fire Emblem: The Blazing Blade
4,Leila,Gray,Dagger,331,stars-5-Star,40,36,41,23,26,166,2.0,Leila - Rose amid Fangs,False,True,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[The Dread Isle],1,False,[5],,2020-02-17,Fire Emblem: The Blazing Blade


In [376]:
dfn3[dfn3["Release Date"].isnull()]
# Tethys: typo ("Dancer")
# Canas: "Wisdom Seeker in GPedia"
# Hatari - no info
# Niles - the word "be" is capitalized in Gpedia
# Eliwood - Marquess Pherae in GPedia
# Winter Tharja - quotes around "Normal Girl" in Gpedia

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain,Release Date,Origin - GPedia
267,Hatari Azura,Green,Tome,306,stars-5-Star,37,37,39,16,23,152,,Azura - Hatari Duet,False,True,False,"[Fire Emblem: Path of Radiance, Fire Emblem Fa...",True,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,[Of Lost Kingdoms],1,True,[5],,,
268,Hatari Karla,Blue,Lance,331,stars-5-Star,41,40,42,31,28,182,,Karla - Sun-Piercing Steel,False,True,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Of Lost Kingdoms],1,False,[5],,,
269,Hatari Nailah,Red,Beast,331,stars-5-Star,40,39,43,35,25,182,,Nailah - Hatari Scorcher,False,True,False,Fire Emblem: Radiant Dawn,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Of Lost Kingdoms],1,False,[5],,,
270,Hatari Xane,Gray,Staff,331,4-5,47,30,36,23,35,171,,Xane - Desert Mirage,False,False,False,Fire Emblem: Mystery of the Emblem,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[Of Lost Kingdoms],1,False,"[4, 5]",,,
762,Hatari Deen,Gray,Dagger,306,4-5_Event:_Tempest_Trials,43,36,39,27,23,168,,Deen - Bladed Sandstorm,True,False,False,Fire Emblem Echoes,False,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,[],0,False,"[4, 5]",TT,,


In [377]:
heroes = dfn3
#heroes = heroes[heroes["Obtain"] != "Enemy-Only"]

In [378]:
heroes[["HP", "Atk", "Spd", "Def", "Res"]] = heroes[["HP", "Atk", "Spd", "Def", "Res"]].apply(pd.to_numeric)

## Save to CSV

The cell below will save the data that we've scraped and processed into a .csv file that can be used for future analysis.

In [379]:
# set last_update variable to the current month and year
last_update = time.strftime("%m-%y")
heroes.to_excel("hero_data_" + last_update + ".xlsx", encoding='utf-8', index=False)
heroes.to_csv("hero_data_" + last_update + ".csv", encoding='utf-8', index=False)
#heroes["Name"].to_excel("tier_list.xlsx", encoding='utf-8', header = True)

In [381]:
heroes = pd.read_csv('./hero_data_' + last_update + '.csv')
heroes.head()

Unnamed: 0,Name,Color,Weapon,MoveCode,Rarities,HP,Atk,Spd,Def,Res,Total,Tier,Title,Refines,Personal Weapon,Legendary/Mythic,Origin,Duo,Image,Movement,Banners,Number of Banners,Refresher,Stars,Obtain,Release Date,Origin - GPedia
0,Sharena,Blue,Lance,331,4_Star_Story,43,32,32,29,22,158,5.0,Sharena - Princess of Askr,True,True,False,Fire Emblem Heroes,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,[],0,False,['4'],Story,2017-02-02,Fire Emblem Heroes
1,Flame Emperor,Green,Axe,326,3-4_Grand_Hero_Battle,50,40,25,37,26,178,4.0,Flame Emperor - Bringer of War,True,False,False,Fire Emblem: Three Houses,False,https://gamepress.gg/feheroes/sites/fireemblem...,Armored,[],0,False,"['3', '4']",GHB,2020-03-09,Fire Emblem: Three Houses
2,Rath,Green,Bow,306,3-4,41,33,35,23,24,156,3.0,Rath - Wolf of Sacae,True,False,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Cavalry,['The Dread Isle'],1,False,"['3', '4']",,2020-02-17,Fire Emblem: The Blazing Blade
3,Fiora,Blue,Lance,316,stars-5-Star,38,32,38,21,37,166,5.0,Fiora - Airborne Warrior,True,False,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Flying,['The Dread Isle'],1,False,['5'],,2020-02-17,Fire Emblem: The Blazing Blade
4,Leila,Gray,Dagger,331,stars-5-Star,40,36,41,23,26,166,2.0,Leila - Rose amid Fangs,False,True,False,Fire Emblem: The Blazing Blade,False,https://gamepress.gg/feheroes/sites/fireemblem...,Infantry,['The Dread Isle'],1,False,['5'],,2020-02-17,Fire Emblem: The Blazing Blade


## Conclusion

At this point, we have successfully scraped data from a host website in a proper way, extracting important information and processing data into a clean dataframe. From here, we can do multiple things with this data, such as create a model for predicting the labeled tier of a unit, or perform exploratory data analysis on a specific set of characters. 