In [4]:
# !pip install lxml
# !pip install cssselect

In [1]:
import requests
from lxml import html
import numpy as np
import pandas as pd

def get_list(l , idx, default = None):
    try:
        return l[idx]
    except IndexError:
        return default

def parse_int(v):
    if v is None:
        return None
    try:
        return int(v)
    except ValueError:
        return None

In [2]:
session = requests.Session()
session.headers.update({
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.53'
})

In [3]:
res_pokedex = session.get('https://pokemondb.net/pokedex/all')
res_pokedex.request.headers

{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.53', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}

In [6]:
doc_pokedex = html.fromstring(res_pokedex.text)
pokedex = doc_pokedex.get_element_by_id(id='pokedex')
pokedex_rows = pokedex.find('tbody').findall('tr')
def parseRowPokedex(row):
    id = row.cssselect('td.cell-num.cell-fixed .infocard-cell-data')
    name = row.cssselect('a.ent-name')
    sub_name = row.cssselect('small.text-muted')
    types = list(map(lambda t: t.text_content(), row.cssselect('.type-icon')))
    # HP, Atk, Def, SpAtk, SpDef, Speed 
    stats = list(map(lambda t: int(t.text_content()), row.cssselect('td.cell-num:nth-last-child(-n + 6)')))
    return {
        'id': int(id[0].text_content()) if len(id) else None,
        'name': name[0].text_content() if len(name) else None,
        'sub_name': sub_name[0].text_content() if len(sub_name) else None,
        'types': types,
        'stats': stats
    }

pokedex_data = list(map(parseRowPokedex, pokedex_rows))

pokedex_headers = ['id', 'name', 'sub_name', 'type_1', 'type_2']
def toPandasPokedexRows(row):
    types = row['types']
    return [row['id'], row['name'], row['sub_name'], get_list(types, 0), get_list(types, 1)]
pokedex_df = pd.DataFrame(list(map(toPandasPokedexRows, pokedex_data)), columns=pokedex_headers)
pokedex_df.to_csv('./raw-data/pokedex.csv', index=False)


In [7]:
res_moves = session.get('https://pokemondb.net/move/all')
res_moves.request.headers

{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.53', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}

In [8]:
doc_moves = html.fromstring(res_moves.text)
moves = doc_moves.get_element_by_id(id='moves')
moves_rows = moves.find('tbody').findall('tr')

def parseRowMoves(row):
    col = row.cssselect('td')

    name = col[0].cssselect('a')[0]
    type = col[1].cssselect('a')[0]
    type_damage = col[2]
    power = col[3]
    acc = col[4]
    pp = col[5]
    tm = col[6]
    effect = col[7]
    prob = col[8]

    return {
        'name': name.text_content(),
        'type': type.text_content(),
        'type_damage': type_damage.get('data-sort-value'),
        'power': parse_int(power.text_content()),
        'acc': parse_int(acc.text_content()),
        'pp': parse_int(pp.text_content()),
        'tm': tm.text_content(),
        'effect': effect.text_content(),
        'prob': parse_int(prob.text_content())
    }

moves_data = list(map(parseRowMoves, moves_rows))

moves_headers = ['name', 'type', 'type_damage', 'power', 'acc', 'pp', 'tm', 'effect', 'prob']
def toPandasMovesRows(row):
    return [row['name'], row['type'], row['type_damage'], row['power'], row['acc'], row['pp'], row['tm'], row['effect'], row['prob']]
moves_df = pd.DataFrame(list(map(toPandasMovesRows, moves_data)), columns=moves_headers)
moves_df.to_csv('./raw-data/moves.csv', index=False)

In [9]:
res_abilities = session.get('https://pokemondb.net/ability')
res_abilities.request.headers

{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.53', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}

In [10]:
doc_abilities = html.fromstring(res_abilities.text)
abilities = doc_abilities.get_element_by_id(id='abilities')
abilities_rows = abilities.find('tbody').findall('tr')

def parseRowAbilities(row):
    col = row.cssselect('td')

    name = col[0].cssselect('a')[0]
    desc = col[2]
    gen = col[3]
    return {
        'name': name.text_content(),
        'desc': desc.text_content(),
        'gen': gen.text_content()
    }

abilities_data = list(map(parseRowAbilities, abilities_rows))

ablt_headers = ['name', 'desc', 'gen']
def toPandasAbltRows(row):
    return [row['name'], row['desc'], row['gen']]
ablt = pd.DataFrame(list(map(toPandasAbltRows, abilities_data)), columns=ablt_headers)
ablt.to_csv('./raw-data/abilities.csv', index=False)

In [11]:
res_items = session.get('https://pokemondb.net/item/all')
res_items.request.headers

{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36 Edg/95.0.1020.53', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}

In [12]:
doc_items = html.fromstring(res_items.text)
items = doc_items.cssselect('table.data-table.block-wide')[0]
items_rows = items.find('tbody').findall('tr')

def parseRowItems(row):
    col = row.cssselect('td')

    name = col[0].cssselect('a')[0]
    cat = col[1]
    desc = col[2]
    return {
        'name': name.text_content(),
        'cat': cat.text_content(),
        'desc': desc.text_content()
    }

items_data = list(map(parseRowItems, items_rows))

items_headers = ['name', 'cat', 'desc']
def toPandasItemsRows(row):
    return [row['name'], row['cat'], row['desc']]
items = pd.DataFrame(list(map(toPandasItemsRows, items_data)), columns=items_headers)
items.to_csv('./raw-data/items.csv', index=False)

In [13]:
types = ['normal', 'fire', 'water', 'electric', 'grass', 'ice', 'fighting', 'poison', 'ground', 'flying', 'psychic', 'bug', 'rock', 'ghost', 'dragon', 'dark', 'steel', 'fairy']

v_list = lambda v,n = None: [v] * n if n is not None else [v]

types_matrix = [
    [*v_list(1,12), .5, 0, 1, 1, .5, 1], # NORMAL
    [1, .5, .5, 1, 2, 2, *v_list(1, 5), 2, .5, 1, .5, 1, 2, 1], # FIRE
    [1, 2, .5, 1, .5, *v_list(1,3), 2, *v_list(1,3), 2, 1, .5, *v_list(1,3)], #WATER
    [*v_list(1,2), 2, *v_list(.5,2), *v_list(1,3), 0, 2, *v_list(1,4), .5, *v_list(1,3)], #ELEC
    [1, .5, 2, 1, .5, *v_list(1,2), .5,2,.5,1,.5,2,1,.5,1,.5,1], # GRASS
    [1, *v_list(.5,2), 1,2,.5, *v_list(1,2), *v_list(2,2), *v_list(1,4), 2,1,.5,1], # ICE
    [2, *v_list(1,4), 2,1,.5,1,*v_list(.5,3),2,0,1,2,2,.5], # FIGHT
    [*v_list(1,4), 2, 1, 1, .5, .5, *v_list(1,3), .5, .5, 1, 1, 0, 2], # POISON
    [1,2,1,2,.5,1,1,2,1,0,1,.5,2,*v_list(1,3),2,1], # GROUND
    [*v_list(1,3), .5, 2,1, 2, *v_list(1,4), 2,.5,*v_list(1,3),.5,1], #FLYING
    [*v_list(1,6), 2, 2, 1, 1, .5, *v_list(1,4), 0, .5, 1], # PSYCHIC
    [1, .5, 1, 1, 2, 1, .5, .5, 1, .5, 2, 1, 1, .5, 1, 2, .5, .5], # bug
    [1, 2, *v_list(1,3), 2, .5, 1, .5, 2, 1, 2, *v_list(1,4), .5, 1], # ROCK
    [0, *v_list(1,9), 2, 1, 1, 2, 1, .5, 1, 1], # GHOST
    [*v_list(1,14), 2, 1, .5, 0], # FAIRY
    [*v_list(1,6), .5, *v_list(1,3), 2, 1, 1, 2, 1, .5, 1, .5], # DARK
    [1, *v_list(.5,3), 1, 2, *v_list(1,6), 2, *v_list(1,3), .5, 2], # STEEL
    [1, .5, *v_list(1,4), 2, .5, *v_list(1,6), 2, 2, .5, 1] # FAIRY
]

flat = [item for sublist in types_matrix for item in sublist]


df = pd.DataFrame(np.reshape(np.matrix(flat), (18,18)), columns=types, index=types)

def style_function(v):
    if v == 0:
        return 'text-align: center;background:red;color:black;'
    if v == 0.5:
        return 'text-align: center;background:orange;color:black;'
    if v == 2:
        return 'text-align: center;background:green;color:black;'
    return 'text-align: center;background:white;color:black;'

df.style.applymap(style_function).format("{:.2}")
df.to_csv('./raw-data/types.csv')