In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Create Growth Rate Dataframe

In [2]:
url = "https://serenesforest.net/the-sacred-stones/characters/growth-rates/"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
tables = soup.find_all("table")
rows = tables[0].find_all('tr')
print(rows[0:2])

[<tr>
<th style="width: 23%;">Name</th>
<th style="width: 11%;">HP</th>
<th style="width: 11%;">S/M</th>
<th style="width: 11%;">Skl</th>
<th style="width: 11%;">Spd</th>
<th style="width: 11%;">Lck</th>
<th style="width: 11%;">Def</th>
<th style="width: 11%;">Res</th>
</tr>, <tr>
<td>Eirika</td>
<td>70</td>
<td>40</td>
<td>60</td>
<td>60</td>
<td>60</td>
<td>30</td>
<td>30</td>
</tr>]


In [3]:
def scrape_table(table, game_title: str) -> pd.DataFrame:
    """
    Designed to scrape web tables found on serenesforest
    Serenes frequently uses multiple table header rows so this function
        takes that into account
    Note that header_found here is important as sometimes header rows use SLIGHTLY
        different strings so this is the safest route I could come up with.
        I really only want the first row of headers per table.
    """
    data = []
    columns = []
    header_found = False
    
    for index, row in enumerate(table.find_all('tr')):
        row_data = []
        if not header_found:
            headers = row.find_all('th')
            if headers:
                header_found = True
                columns = [header.text for header in headers]
    
        for cell in row.find_all('td'):
            row_data.append(cell.text)
    
        if row_data:
            data.append(row_data)
        
    df = pd.DataFrame(data, columns=columns)
    df['Game'] = game_title

    return df

In [4]:
df = scrape_table(tables[0], 'the-sacred-stones')
df

Unnamed: 0,Name,HP,S/M,Skl,Spd,Lck,Def,Res,Game
0,Eirika,70,40,60,60,60,30,30,the-sacred-stones
1,Seth,90,50,45,45,25,40,30,the-sacred-stones
2,Franz,80,40,40,50,40,25,20,the-sacred-stones
3,Gilliam,90,45,35,30,30,55,20,the-sacred-stones
4,Moulder,70,40,50,40,20,25,25,the-sacred-stones
5,Vanessa,50,35,55,60,50,20,30,the-sacred-stones
6,Ross,70,50,35,30,40,25,20,the-sacred-stones
7,Garcia,80,65,40,20,40,25,15,the-sacred-stones
8,Neimi,55,45,50,60,50,15,35,the-sacred-stones
9,Colm,75,40,40,65,45,25,20,the-sacred-stones


# Create Base Stats Dataframe

Gonna need some tinkering here. Weapon rank and affinity both use images to convey information.

There's also actually two tables on this page. One for the base game characters and the other for creature campaign. Thankfully that's easy to handle.

In [5]:
url = "https://serenesforest.net/the-sacred-stones/characters/base-stats/"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
tables = soup.find_all("table")
dataframes = [scrape_table(table, 'the-sacred-stones') for table in tables]

for df in dataframes:
    display(df)

Unnamed: 0,Name,Lv,Class,HP,Str,Skl,Spd,Lck,Def,Res,Mov,Con,Weapon Rank,Affin,Game
0,Eirika,1,Lord,16,4,8,9,5,3,1,5,5,E,,the-sacred-stones
1,Seth,1,Paladin,30,14,13,12,13,11,8,8,11,"A, A",,the-sacred-stones
2,Franz,1,Cavalier,20,7,5,7,2,6,1,7,9,"E, D",,the-sacred-stones
3,Gilliam,4,Knight,25,9,6,3,3,9,3,4,14,C,,the-sacred-stones
4,Vanessa,1,Pegasus Knight,17,5,7,11,4,6,5,7,5,D,,the-sacred-stones
5,Moulder,3,Priest,20,4,6,9,1,2,5,5,9,C,,the-sacred-stones
6,Ross,1,Journeyman,15,5,2,3,8,3,0,4,8,E,,the-sacred-stones
7,Garcia,4,Fighter,28,8,7,7,3,5,1,5,14,C,,the-sacred-stones
8,Neimi,1,Archer,17,4,5,6,4,3,2,5,5,D,,the-sacred-stones
9,Colm,2,Thief,18,4,4,10,8,3,1,6,6,E,,the-sacred-stones


Unnamed: 0,Name,Lv,Class,HP,Str,Skl,Spd,Lck,Def,Res,Mov,Con,Weapon Rank,Affin,Game
0,Caellach,12,Hero,47,19,14,13,14,15,13,6,13,"A, A",,the-sacred-stones
1,Orson,13,Paladin,48,18,15,14,6,14,11,8,12,"A, A",,the-sacred-stones
2,Riev,16,Bishop,49,14,21,19,9,16,18,6,7,"S, A",,the-sacred-stones
3,Ismaire,9,Swordmaster,33,16,20,23,12,8,15,6,7,A,,the-sacred-stones
4,Selena,11,Mage Knight,38,13,13,16,10,11,17,7,6,"A, B",,the-sacred-stones
5,Glen,12,Wyvern Lord,46,20,17,13,7,18,5,8,12,"A, A",,the-sacred-stones
6,Hayden,10,Ranger,37,17,14,15,17,12,12,7,10,"A, A",,the-sacred-stones
7,Valter,13,Wyvern Knight,45,19,17,17,3,13,12,8,11,S,,the-sacred-stones
8,Fado,11,General,46,20,14,12,5,18,11,5,18,"A, A, A",,the-sacred-stones
9,Lyon,14,Necromancer,44,22,13,11,4,17,19,6,7,"S, A",,the-sacred-stones


## Fixing Weapon Ranks and Affinity Columns

In [6]:
import re

seth = tables[0].find_all('tr')[2]
weapon_info = seth.find_all('td')[-2]
affinity = seth.find_all('td')[-1]
print(f"Weapon html: {weapon_info}\n\nAffinity html: {affinity}")

Weapon html: <td><a href="https://serenesforest.net/wp-content/uploads/2014/04/TypeSword.gif"><img alt="Sword" class="alignnone size-full" src="https://serenesforest.net/wp-content/uploads/2014/04/TypeSword.gif"/></a> A, <a href="https://serenesforest.net/wp-content/uploads/2014/04/TypeLance.gif"><img alt="Lance" class="alignnone size-full" src="https://serenesforest.net/wp-content/uploads/2014/04/TypeLance.gif"/></a> A</td>

Affinity html: <td><a href="https://serenesforest.net/wp-content/uploads/2014/04/AffinAnima.gif"><img alt="Anima" class="alignnone size-full" src="https://serenesforest.net/wp-content/uploads/2014/04/AffinAnima.gif"/></a></td>


In [7]:
affinity_type = affinity.find('img')['alt']
affinity_type

'Anima'

In [8]:
weapon_types = weapon_info.find_all('img')
weapon_types = [weapon['alt'] for weapon in weapon_types]
weapon_types

['Sword', 'Lance']

In [9]:
ranks = weapon_info.text.replace(',', '').split()
ranks

['A', 'A']

In [10]:
weapon_ranks = [f"{ranks[i]} {weapon}" for i, weapon in enumerate(weapon_types)]
weapon_ranks

['A Sword', 'A Lance']