#### Imports necessários:

In [1]:
# Para logs:
from tqdm import tqdm
import logging
import sys
sys.path.append('../utils')
from utils import get_logger, load_json, export_json, Clock
# Para web scraping:
import requests
from bs4 import BeautifulSoup, SoupStrainer
import cchardet
from urllib.parse import unquote
# Contador
from itertools import count
# Manipulação de dados:
import pandas as pd
import numpy as np
# Configs:
pd.options.display.max_columns = None

#### Carregando a base de dados:

Primeiro vamos dar uma olhada nas colunas do dataset atualmente utilizado:

In [2]:
data_path = 'C:\\Users\\User\\Documents\\jupyter_notebooks\\ufmg\\itemset_mining_applied_to_pokemon_teams\\'
df = pd.read_csv(data_path + 'data\\original\\pokedex.csv', delimiter=',', encoding='utf8')
print(f'O dataset possui {df.shape[0]} linhas e {df.shape[1]} colunas.')
print(df.columns.tolist())

O dataset possui 1033 linhas e 40 colunas.
['pokedex_number', 'name', 'generation', 'classfication', 'abilities', 'height_m', 'weight_kg', 'type1', 'type2', 'base_total', 'hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'against_bug', 'against_dark', 'against_dragon', 'against_electric', 'against_fairy', 'against_fighting', 'against_fire', 'against_flying', 'against_ghost', 'against_grass', 'against_ground', 'against_ice', 'against_normal', 'against_poison', 'against_psychic', 'against_rock', 'against_steel', 'against_water', 'capture_rate', 'base_egg_steps', 'base_happiness', 'is_legendary', 'is_mythical', 'is_mega']


Agora vamos abrir o dataset alternativo que contém parte das informações dos pokémons da 9ª geração:

In [3]:
df2 = pd.read_csv(data_path + 'data\\original\\PokemonStats.csv', skiprows=range(1, 1080), encoding='utf8', delimiter=',')
df2.head()

Unnamed: 0,ID,Name,Total,HP,Attack,Defense,SpAtk,SpDef,Speed,Type1,Type2,Height,Weight
0,906,Sprigatito,310,40,61,54,45,45,65,Grass,,0.4,4.1
1,907,Floragato,410,61,80,63,60,63,83,Grass,,0.9,12.2
2,908,Meowscarada,530,76,110,70,81,70,123,Grass,Dark,1.5,31.2
3,909,Fuecoco,310,67,45,59,63,40,36,Fire,,0.4,9.8
4,910,Crocalor,411,81,55,78,90,58,49,Fire,,1.0,30.7


#### Populando os dados dos pokémons da 9ª geração:

Inicializando o logger:

In [4]:
logger = logging.getLogger(__name__)
logger = get_logger(logger=logger)

 Vamos completar as informações do dataset secundário seguindo o formato do dataset primário:

In [5]:
df2.rename(columns={
    'ID': 'pokedex_number',
    'Name': 'name',
    'Total': 'base_total',
    'HP': 'hp',
    'Attack': 'attack',
    'Defense': 'defense',
    'SpAtk': 'sp_attack',
    'SpDef': 'sp_defense',
    'Speed': 'speed',
    'Type1': 'type1',
    'Type2': 'type2',
    'Height': 'height_m',
    'Weight': 'weight_kg'
}, inplace=True)
df2.fillna('None', inplace=True)
for column_name in list(set(df.columns) - set(df2.columns)):
    df2[column_name] = ''
df2 = df2[list(df.columns)]
df2['generation'] = 9
df2['is_mega'] = 0
df2['is_mythical'] = 0
df2['is_legendary'] = 0
df2.loc[df2['name'].isin(['Koraidon', 'Miraidon', 'Ting-Lu', 'Chien-Pao', 'Wo-Chien', 'Chi-Yu']), 'is_legendary'] = 1
df2.head()

Unnamed: 0,pokedex_number,name,generation,classfication,abilities,height_m,weight_kg,type1,type2,base_total,hp,attack,defense,sp_attack,sp_defense,speed,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fighting,against_fire,against_flying,against_ghost,against_grass,against_ground,against_ice,against_normal,against_poison,against_psychic,against_rock,against_steel,against_water,capture_rate,base_egg_steps,base_happiness,is_legendary,is_mythical,is_mega
0,906,Sprigatito,9,,,0.4,4.1,Grass,,310,40,61,54,45,45,65,,,,,,,,,,,,,,,,,,,,,,0,0,0
1,907,Floragato,9,,,0.9,12.2,Grass,,410,61,80,63,60,63,83,,,,,,,,,,,,,,,,,,,,,,0,0,0
2,908,Meowscarada,9,,,1.5,31.2,Grass,Dark,530,76,110,70,81,70,123,,,,,,,,,,,,,,,,,,,,,,0,0,0
3,909,Fuecoco,9,,,0.4,9.8,Fire,,310,67,45,59,63,40,36,,,,,,,,,,,,,,,,,,,,,,0,0,0
4,910,Crocalor,9,,,1.0,30.7,Fire,,411,81,55,78,90,58,49,,,,,,,,,,,,,,,,,,,,,,0,0,0


In [6]:
base_url = 'https://bulbapedia.bulbagarden.net'
headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 OPR/97.0.0.0'
}

Vamos coletar os links das páginas da _Bulbapedia_ para cada pokémon da 9ª geração:

In [7]:
just_table = SoupStrainer('table')

response = requests.get(base_url + '/wiki/List_of_Pokémon_by_National_Pokédex_number', headers=headers)

if response.status_code == 200:
    content = response.content.decode('utf-8')
    content_decoded = unquote(content)
    main_soup = BeautifulSoup(content_decoded, 'html.parser', parse_only=just_table)
    gen9_table = main_soup.select_one('table:has(a[href*="Sprigatito"])')
    pokemon_links = [link.get('href') for link in gen9_table.select('tr td:nth-child(2) a:first-child')]
else:
    print("Falha na solicitação HTTP!")

Não utilizaremos os 11 últimos pelo fato de que são pokémons ainda não lançados e, por tanto, não há tantas informações sobre eles na internet:

In [8]:
pokemon_links = pokemon_links[:-11]
pokemon_links

['/wiki/Sprigatito_(Pokémon)',
 '/wiki/Floragato_(Pokémon)',
 '/wiki/Meowscarada_(Pokémon)',
 '/wiki/Fuecoco_(Pokémon)',
 '/wiki/Crocalor_(Pokémon)',
 '/wiki/Skeledirge_(Pokémon)',
 '/wiki/Quaxly_(Pokémon)',
 '/wiki/Quaxwell_(Pokémon)',
 '/wiki/Quaquaval_(Pokémon)',
 '/wiki/Lechonk_(Pokémon)',
 '/wiki/Oinkologne_(Pokémon)',
 '/wiki/Oinkologne_(Pokémon)',
 '/wiki/Tarountula_(Pokémon)',
 '/wiki/Spidops_(Pokémon)',
 '/wiki/Nymble_(Pokémon)',
 '/wiki/Lokix_(Pokémon)',
 '/wiki/Pawmi_(Pokémon)',
 '/wiki/Pawmo_(Pokémon)',
 '/wiki/Pawmot_(Pokémon)',
 '/wiki/Tandemaus_(Pokémon)',
 '/wiki/Maushold_(Pokémon)',
 '/wiki/Maushold_(Pokémon)',
 '/wiki/Fidough_(Pokémon)',
 '/wiki/Dachsbun_(Pokémon)',
 '/wiki/Smoliv_(Pokémon)',
 '/wiki/Dolliv_(Pokémon)',
 '/wiki/Arboliva_(Pokémon)',
 '/wiki/Squawkabilly_(Pokémon)',
 '/wiki/Squawkabilly_(Pokémon)',
 '/wiki/Squawkabilly_(Pokémon)',
 '/wiki/Squawkabilly_(Pokémon)',
 '/wiki/Nacli_(Pokémon)',
 '/wiki/Naclstack_(Pokémon)',
 '/wiki/Garganacl_(Pokémon)',
 '/wik

Vamos criar um dicionário com os nomes dos pokémons do dataset e os links da _Bulbapedia_ a fim de facilitar no preenchimento a cada iteração:

In [9]:
pokemons_and_links = dict(zip(df2['name'], pokemon_links))
pokemons_and_links

{'Sprigatito': '/wiki/Sprigatito_(Pokémon)',
 'Floragato': '/wiki/Floragato_(Pokémon)',
 'Meowscarada': '/wiki/Meowscarada_(Pokémon)',
 'Fuecoco': '/wiki/Fuecoco_(Pokémon)',
 'Crocalor': '/wiki/Crocalor_(Pokémon)',
 'Skeledirge': '/wiki/Skeledirge_(Pokémon)',
 'Quaxly': '/wiki/Quaxly_(Pokémon)',
 'Quaxwell': '/wiki/Quaxwell_(Pokémon)',
 'Quaquaval': '/wiki/Quaquaval_(Pokémon)',
 'Lechonk': '/wiki/Lechonk_(Pokémon)',
 'Oinkologne Male': '/wiki/Oinkologne_(Pokémon)',
 'Oinkologne Female': '/wiki/Oinkologne_(Pokémon)',
 'Tarountula': '/wiki/Tarountula_(Pokémon)',
 'Spidops': '/wiki/Spidops_(Pokémon)',
 'Nymble': '/wiki/Nymble_(Pokémon)',
 'Lokix': '/wiki/Lokix_(Pokémon)',
 'Pawmi': '/wiki/Pawmi_(Pokémon)',
 'Pawmo': '/wiki/Pawmo_(Pokémon)',
 'Pawmot': '/wiki/Pawmot_(Pokémon)',
 'Tandemaus': '/wiki/Tandemaus_(Pokémon)',
 'Maushold Family of Four': '/wiki/Maushold_(Pokémon)',
 'Maushold Family of Three': '/wiki/Maushold_(Pokémon)',
 'Fidough': '/wiki/Fidough_(Pokémon)',
 'Dachsbun': '/wiki/

Vamos fazer _web scraping_ para obter as demais informações faltantes para cada pokémon:

In [10]:
logger.info("Iniciando uma sessão...")
with requests.Session() as session:
    logger.info("A iteração começará agora...")
    iteration_time = Clock("Coleta das informações faltantes dos pokémons da 9ª geração")
    with tqdm(total=len(pokemons_and_links)) as progress_bar:
        progress_bar.set_description("Populando as colunas em branco dos pokémons da 9ª geração...")
        for pokemon_name, link in pokemons_and_links.items():
            response = requests.get(base_url + link, headers=headers)
            soup = BeautifulSoup(response.content, 'html.parser')
            df2.loc[df2['name'] == pokemon_name, 'classfication'] = soup.select_one('a[title*="Pokémon category"]').get_text()
            df2.loc[df2['name'] == pokemon_name, 'capture_rate'] = soup.select_one('a[title="Catch rate"]').find_parent().find_next_sibling().td.get_text().split(' ')[0]
            df2.loc[df2['name'] == pokemon_name, 'base_happiness'] = soup.select_one('a[title="List of Pokémon by base friendship"]').find_parent().find_next_sibling().td.get_text()[:-1]
            df2.loc[df2['name'] == pokemon_name, 'abilities'] = str([link.get_text() for link in soup.select('tr > td:not([style="display: none"]) > a[title$=" (Ability)"]')])
            
            effectiveness_table = soup.select_one('h3:has(span[id="Type_effectiveness"]) + table')
            df2.loc[df2['name'] == pokemon_name, 'against_bug'] = effectiveness_table.select_one('a[title="Bug (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_dark'] = effectiveness_table.select_one('a[title="Dark (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_dragon'] = effectiveness_table.select_one('a[title="Dragon (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_electric'] = effectiveness_table.select_one('a[title="Electric (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_fairy'] = effectiveness_table.select_one('a[title="Fairy (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_fighting'] = effectiveness_table.select_one('a[title="Fighting (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_fire'] = effectiveness_table.select_one('a[title="Fire (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_flying'] = effectiveness_table.select_one('a[title="Flying (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_ghost'] = effectiveness_table.select_one('a[title="Ghost (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_grass'] = effectiveness_table.select_one('a[title="Grass (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_ground'] = effectiveness_table.select_one('a[title="Ground (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_ice'] = effectiveness_table.select_one('a[title="Ice (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_normal'] = effectiveness_table.select_one('a[title="Normal (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_poison'] = effectiveness_table.select_one('a[title="Poison (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_psychic'] = effectiveness_table.select_one('a[title="Psychic (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_rock'] = effectiveness_table.select_one('a[title="Rock (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_steel'] = effectiveness_table.select_one('a[title="Steel (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            df2.loc[df2['name'] == pokemon_name, 'against_water'] = effectiveness_table.select_one('a[title="Water (type)"]').find_parent().find_parent().find_next_sibling().get_text()[:-2]
            progress_bar.update(1)
    iteration_time.stop_watch()
    logger.info("Coleta concluída!")

2023-10-10 19:42:28,886 - INFO - Iniciando uma sessão...
2023-10-10 19:42:28,889 - INFO - A iteração começará agora...
Populando as colunas em branco dos pokémons da 9ª geração...: 100%|██████████████████| 115/115 [01:35<00:00,  1.21it/s]
2023-10-10 19:44:04,278 - INFO - Coleta das informações faltantes dos pokémons da 9ª geração levou 01m35s para ser executado.
2023-10-10 19:44:04,280 - INFO - Coleta concluída!


Vamos corrigir a forma como os dados das colunas relativas a fraquezas e vantagens estão dispostos:

In [12]:
df2[list(df2.columns)[16:-6]] = df2[list(df2.columns)[16:-6]].replace({'¼': 0.25, '½': 0.5, '1': 1.0, '2': 2.0, '4': 4.0})
df2

Unnamed: 0,pokedex_number,name,generation,classfication,abilities,height_m,weight_kg,type1,type2,base_total,hp,attack,defense,sp_attack,sp_defense,speed,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fighting,against_fire,against_flying,against_ghost,against_grass,against_ground,against_ice,against_normal,against_poison,against_psychic,against_rock,against_steel,against_water,capture_rate,base_egg_steps,base_happiness,is_legendary,is_mythical,is_mega
0,906,Sprigatito,9,Grass Cat Pokémon,"['Overgrow', 'Protean']",0.4,4.1,Grass,,310,40,61,54,45,45,65,2.00,1.00,1.0,0.5,1.0,1.0,2.00,2.0,1.0,0.5,0.5,2.0,1.0,2.0,1.0,1.0,1.0,0.50,45,,50,0,0,0
1,907,Floragato,9,Grass Cat Pokémon,"['Overgrow', 'Protean']",0.9,12.2,Grass,,410,61,80,63,60,63,83,2.00,1.00,1.0,0.5,1.0,1.0,2.00,2.0,1.0,0.5,0.5,2.0,1.0,2.0,1.0,1.0,1.0,0.50,45,,50,0,0,0
2,908,Meowscarada,9,Magician Pokémon,"['Overgrow', 'Protean']",1.5,31.2,Grass,Dark,530,76,110,70,81,70,123,4.00,0.50,1.0,0.5,2.0,2.0,2.00,2.0,0.5,0.5,0.5,2.0,1.0,2.0,0,1.0,1.0,0.50,45,,50,0,0,0
3,909,Fuecoco,9,Fire Croc Pokémon,"['Blaze', 'Unaware']",0.4,9.8,Fire,,310,67,45,59,63,40,36,0.50,1.00,1.0,1.0,0.5,1.0,0.50,1.0,1.0,0.5,2.0,0.5,1.0,1.0,1.0,2.0,0.5,2.00,45,,50,0,0,0
4,910,Crocalor,9,Fire Croc Pokémon,"['Blaze', 'Unaware']",1.0,30.7,Fire,,411,81,55,78,90,58,49,0.50,1.00,1.0,1.0,0.5,1.0,0.50,1.0,1.0,0.5,2.0,0.5,1.0,1.0,1.0,2.0,0.5,2.00,45,,50,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,1006,Iron Valiant,9,Paradox Pokémon,['Quark Drive'],1.4,35.0,Fairy,Fighting,590,74,130,90,120,60,116,0.25,0.25,0,1.0,2.0,0.5,1.00,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.5,2.0,1.00,10,,0,0,0,0
111,1007,Koraidon,9,Paradox Pokémon,['Orichalcum Pulse'],2.5,303.0,Fighting,Dragon,670,100,135,115,85,100,135,0.50,0.50,2.0,0.5,4.0,1.0,0.50,2.0,1.0,0.5,1.0,2.0,1.0,1.0,2.0,0.5,1.0,0.50,3,,0,1,0,0
112,1008,Miraidon,9,Paradox Pokémon,['Hadron Engine'],3.5,240.0,Electric,Dragon,670,100,85,100,135,115,135,1.00,1.00,2.0,0.25,2.0,1.0,0.50,0.5,1.0,0.5,2.0,2.0,1.0,1.0,1.0,1.0,0.5,0.50,3,,0,1,0,0
113,1009,Walking Wake,9,Paradox Pokémon,['Protosynthesis'],3.5,280.0,Water,Dragon,590,99,83,91,125,83,109,1.00,1.00,2.0,1.0,2.0,1.0,0.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.25,5,,0,0,0,0


No caso da coluna 'base_egg_cycles', a informação de que precisamos não existe na página individual de cada pokémon na _Bulbapedia_, fazendo-se necessária a coleta de outra página:

In [13]:
response = requests.get(base_url + '/wiki/List_of_Pokémon_by_base_Egg_cycles', headers=headers)

if response.status_code == 200:
    page = response.content
    soup = BeautifulSoup(page, 'html.parser')
else:
    print("Falha na solicitação HTTP!")

Vamos montar um dataframe contendo os nomes e suas respectivas 'base_egg_steps':

In [14]:
egg_steps_table = soup.find('table', class_='sortable roundy')


data = []
for row in egg_steps_table.find_all('tr')[1:]:  # Ignorar a primeira linha que contém cabeçalhos
    cols = row.find_all('td')
    cols = [col.text for col in cols]
    data.append(cols)
    
egg_steps_df = pd.DataFrame(data)
egg_steps_df = egg_steps_df.iloc[911:1011, [2, 11]].reset_index(drop=True)
egg_steps_df.rename(columns={
    2: 'name',
    11: 'base_egg_steps'
}, inplace=True)
for column in egg_steps_df.columns:
    egg_steps_df[column] = egg_steps_df[column].apply(lambda text: text[:-1])
egg_steps_df

Unnamed: 0,name,base_egg_steps
0,Sprigatito,2560
1,Floragato,2560
2,Meowscarada,2560
3,Fuecoco,2560
4,Crocalor,2560
...,...,...
95,Iron Valiant,6400
96,Koraidon,6400
97,Miraidon,6400
98,Walking Wake,6400


Vamos juntar as informações usando o _merge_ e ver o resultado:

In [15]:
df2.drop('base_egg_steps', axis=1, inplace=True)
df2 = df2.merge(egg_steps_df, on='name', how='left')
df2 = df2[list(df.columns)]
df2

Unnamed: 0,pokedex_number,name,generation,classfication,abilities,height_m,weight_kg,type1,type2,base_total,hp,attack,defense,sp_attack,sp_defense,speed,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fighting,against_fire,against_flying,against_ghost,against_grass,against_ground,against_ice,against_normal,against_poison,against_psychic,against_rock,against_steel,against_water,capture_rate,base_egg_steps,base_happiness,is_legendary,is_mythical,is_mega
0,906,Sprigatito,9,Grass Cat Pokémon,"['Overgrow', 'Protean']",0.4,4.1,Grass,,310,40,61,54,45,45,65,2.00,1.00,1.0,0.5,1.0,1.0,2.00,2.0,1.0,0.5,0.5,2.0,1.0,2.0,1.0,1.0,1.0,0.50,45,2560,50,0,0,0
1,907,Floragato,9,Grass Cat Pokémon,"['Overgrow', 'Protean']",0.9,12.2,Grass,,410,61,80,63,60,63,83,2.00,1.00,1.0,0.5,1.0,1.0,2.00,2.0,1.0,0.5,0.5,2.0,1.0,2.0,1.0,1.0,1.0,0.50,45,2560,50,0,0,0
2,908,Meowscarada,9,Magician Pokémon,"['Overgrow', 'Protean']",1.5,31.2,Grass,Dark,530,76,110,70,81,70,123,4.00,0.50,1.0,0.5,2.0,2.0,2.00,2.0,0.5,0.5,0.5,2.0,1.0,2.0,0,1.0,1.0,0.50,45,2560,50,0,0,0
3,909,Fuecoco,9,Fire Croc Pokémon,"['Blaze', 'Unaware']",0.4,9.8,Fire,,310,67,45,59,63,40,36,0.50,1.00,1.0,1.0,0.5,1.0,0.50,1.0,1.0,0.5,2.0,0.5,1.0,1.0,1.0,2.0,0.5,2.00,45,2560,50,0,0,0
4,910,Crocalor,9,Fire Croc Pokémon,"['Blaze', 'Unaware']",1.0,30.7,Fire,,411,81,55,78,90,58,49,0.50,1.00,1.0,1.0,0.5,1.0,0.50,1.0,1.0,0.5,2.0,0.5,1.0,1.0,1.0,2.0,0.5,2.00,45,2560,50,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,1006,Iron Valiant,9,Paradox Pokémon,['Quark Drive'],1.4,35.0,Fairy,Fighting,590,74,130,90,120,60,116,0.25,0.25,0,1.0,2.0,0.5,1.00,2.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,0.5,2.0,1.00,10,6400,0,0,0,0
111,1007,Koraidon,9,Paradox Pokémon,['Orichalcum Pulse'],2.5,303.0,Fighting,Dragon,670,100,135,115,85,100,135,0.50,0.50,2.0,0.5,4.0,1.0,0.50,2.0,1.0,0.5,1.0,2.0,1.0,1.0,2.0,0.5,1.0,0.50,3,6400,0,1,0,0
112,1008,Miraidon,9,Paradox Pokémon,['Hadron Engine'],3.5,240.0,Electric,Dragon,670,100,85,100,135,115,135,1.00,1.00,2.0,0.25,2.0,1.0,0.50,0.5,1.0,0.5,2.0,2.0,1.0,1.0,1.0,1.0,0.5,0.50,3,6400,0,1,0,0
113,1009,Walking Wake,9,Paradox Pokémon,['Protosynthesis'],3.5,280.0,Water,Dragon,590,99,83,91,125,83,109,1.00,1.00,2.0,1.0,2.0,1.0,0.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.25,5,6400,0,0,0,0


Como o _egg_steps_df_ possuía menos linhas, algumas linhas do _df2_ ficaram vazias. Isso será manualmente preenchido antes da análise exploratória assim como o dataset primário. Vamos passar as informações do dataset secundário para o primário e exportar como _csv_:

In [16]:
df = pd.concat([df, df2]).reset_index(drop=True)
df['base_egg_steps'] = df['base_egg_steps'].str.replace(',', '', regex=True)
df['base_egg_steps'] = df['base_egg_steps'].apply(lambda x: x if x is np.nan else int(x))
df.head()

Unnamed: 0,pokedex_number,name,generation,classfication,abilities,height_m,weight_kg,type1,type2,base_total,hp,attack,defense,sp_attack,sp_defense,speed,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fighting,against_fire,against_flying,against_ghost,against_grass,against_ground,against_ice,against_normal,against_poison,against_psychic,against_rock,against_steel,against_water,capture_rate,base_egg_steps,base_happiness,is_legendary,is_mythical,is_mega
0,1,Bulbasaur,1,Seed Pokemon,"['Overgrow', 'Chlorophyll']",0.7,6.9,Grass,Poison,318,45,49,49,65,65,45,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,1.0,2.0,1.0,1.0,2.0,1.0,1.0,0.5,45,5140.0,70.0,0,0,0
1,2,Ivysaur,1,Seed Pokemon,"['Overgrow', 'Chlorophyll']",1.0,13.0,Grass,Poison,405,60,62,63,80,80,60,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,1.0,2.0,1.0,1.0,2.0,1.0,1.0,0.5,45,5140.0,70.0,0,0,0
2,3,Venusaur,1,Seed Pokemon,"['Overgrow', 'Chlorophyll']",2.0,100.0,Grass,Poison,525,80,82,83,100,100,80,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,1.0,2.0,1.0,1.0,2.0,1.0,1.0,0.5,45,5140.0,70.0,0,0,0
3,3,Mega Venusaur,1,Seed Pokemon,['Thick Fat'],2.4,155.5,Grass,Poison,625,80,100,123,122,120,80,1.0,1.0,1.0,0.5,0.5,0.5,1.0,2.0,1.0,0.25,1.0,1.0,1.0,1.0,2.0,1.0,1.0,0.5,45,5140.0,70.0,0,0,1
4,4,Charmander,1,Lizard Pokemon,"['Blaze', 'Solar Power']",0.6,8.5,Fire,,309,39,52,43,60,50,65,0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,0.5,2.0,0.5,1.0,1.0,1.0,2.0,0.5,2.0,45,5140.0,70.0,0,0,0


#### Exportando o dataset resultante:

In [17]:
df.to_csv(data_path + 'data\\processed\\pokedex_atualizada.csv', index=False)