# Util Functions
---

- ### Formatar uma saída.

In [1]:
def get_formatted_message(poke_info):
    type_label    = 'Types' if len(poke_info['types']) > 1 else 'Type'
    ability_label = 'Abilities' if len(poke_info['abilities']) > 1 else 'Ability'
    

    return """\
########################################################################################
# Index: %d
# Name: %s / %s (%s)
# Category: %s
# %s: %s
# %s: \n# - %s
# Gender Ratio: %s
########################################################################################
    """ % (
        poke_info['core']['index'],
        poke_info['core']['name'],
        poke_info['core']['jp_name'],
        poke_info['core']['jp_rom_name'],
        poke_info['core']['category'],
        type_label,
        ' / '.join(poke_info['types']),
        ability_label,
        ' \n# - '.join(poke_info['abilities']),
        ' / '.join(poke_info['gender_ratio']) if len(poke_info['gender_ratio']) > 0 else "Genderless"
    )


---
&nbsp;
- ### Recriar arquivos dado o nome.

In [2]:
import os

def recreate_file(file_name):
    if (os.path.exists(file_name)):
        os.remove(file_name)

    open(file_name, 'x')



# Core Functions
---

- ### Carregar a página destino e retornar o soup.

In [3]:
from urllib.request import urlopen as uOpen, Request as uReq
from bs4 import BeautifulSoup as soup

def get_poke_soup(link):
    uClient         = uReq(link, headers={'User-Agent': 'Magic Browser'})
    uCon            = uOpen(uClient)
    poke_page_html  = uCon.read()
    uCon.close()
    
    return soup(poke_page_html, 'html.parser')

---
&nbsp;
- ### Checar se é a última página a ser verificada.
    - Em alguns casos, quando tá perto do lançamento de um novo jogo e só se tem informação de um ou mais pokémons, às vezes o Bulbapédia coloca informações não oficiais com um layout diferente.

In [4]:
def check_last_page(poke_soup):
    return poke_soup.find(id='mw-content-text').table.a['href'] == "/wiki/File:BulbaShadow.png"

---
&nbsp;
- ### Pegar informações do Pokémon
    - Essa é a função core, todas as funções de extração de dados são chamadas aqui.

In [5]:
def get_poke_info(poke_soup):
    info_table = poke_soup                      \
                    .find(id='mw-content-text') \
                    .find_all('table', recursive=False)[1]

    core      = get_core_poke_info(info_table)
    types     = get_poke_types(info_table)
    abilities = get_poke_abilities(info_table)
    gender_ratios    = get_poke_ratio(info_table)
    
    return {
        'core': {
            'index':       core[0],
            'name':        core[1],
            'category':    core[2],
            'jp_name':     core[3],
            'jp_rom_name': core[4]
        },
        'types':     types,
        'abilities': abilities,
        'gender_ratio': gender_ratios
    }

# Data Extraction Functions
---

- ### Pegar o link do próximo pokémon
    - Nessa parte da página, o Bulbapédia pode colocar trs a mais pra informar algo (geralmente relacionado ao Pokemon Go), por isso esse tratamento.

In [6]:
def get_next_pokemon_link(poke_soup):
    npl = poke_soup.find(id='mw-content-text').table
    try:
        npl = npl.findChildren('tr', recursive=False)[1]    \
                 .findChildren('td', recursive=False)[2]
    except IndexError:
        npl = npl.findChildren('tr', recursive=False)[0]    \
                 .findChildren('td', recursive=False)[2]
    finally:
        return npl.find('a')['href']            

---
&nbsp;
- ### Pegar informações core do pokémon:
    - Index (número na NationalDex)
    - Nome
    - Categoria
    - Nome (JP)
    - Nome (JP/Romaji)

In [7]:
def get_core_poke_info(info_table):
    base_info_container = info_table.tr.td.table.tr

    info_container = base_info_container.td.table.tr      \
                        .find_all('td', recursive=False)
    
    return (
        int(base_info_container.th.find('a').text.replace('#', '')), # index

        info_container[0].big.text,                                  # name
        info_container[0].a.text,                                    # category
        info_container[1].span.text,                                 # jp_name
        info_container[1].i.text                                     # jp_rom_name
    )

---
&nbsp;
- ### Pegar tipos do pokémon.

In [8]:
def get_poke_types(info_table):
    types = info_table.find_all('tr', recursive=False)[1]          \
                      .table.find('td', attrs={'style': None}) \
                      .find_all('a')
    
    return [t.text for t in types if t.text != 'Unknown']


---
&nbsp;
- ### Pegar abilidades do pokémon.

In [9]:
def get_poke_abilities(info_table):
    ability_title = info_table.find('a', attrs={'title': 'Ability'})
    abilities = []

    for parent in ability_title.parents:        
        if parent.name == 'td':
            abilities_container = parent.table.find_all('td')
            
            for td in abilities_container:
                if not td.has_attr('style') or (not 'display: none' in td['style']):
                    ability = td.find('a').text

                    hidden_ability_container = td.find('small')
                    if  hidden_ability_container != None:
                        ability += f' ({hidden_ability_container.text.strip()})'

                    abilities.append(ability)
            break
    
    
    return abilities

- ### Pegar gender ratios.

In [10]:
def get_poke_ratio(info_table):
    gender_ratio_titles = info_table.find('a', attrs={'title': 'List of Pokémon by gender ratio'})
    ratios = []
    
    for parent in gender_ratio_titles.parents:        
        if parent.name == 'td':
            ratios_container = parent.table.find_all('td')
            
            for td in ratios_container:
                unformatted_ratio = td.find('a')
                if not td.has_attr('style') and unformatted_ratio != None:
                    unformatted_ratio = td.find('a').text
                    ratios = list(map(lambda r: r.strip(), unformatted_ratio.split(',')))

            break
    return ratios

# Main Cycle
---

- ### Constantes

In [11]:
BASE_URL  = 'https://bulbapedia.bulbagarden.net'
END_URL   = '/wiki/%3F%3F%3F_(Pok%C3%A9mon)'
FILE_NAME = 'pokemon_list.txt'

def get_pokemon_link(poke_name):
        return f'/wiki/{poke_name.capitalize()}_%28Pok%C3%A9mon%29'

---
&nbsp;
- ### Main

In [12]:
recreate_file(FILE_NAME)

with open(FILE_NAME, 'w') as f:
    next_pokemon_link = get_pokemon_link('Bulbasaur')

    while(True):
        poke_soup = get_poke_soup(f'{BASE_URL}{next_pokemon_link}')
        if check_last_page(poke_soup):
            break

        poke_info = get_poke_info(poke_soup)
        message   = get_formatted_message(poke_info)

        f.write(f'{next_pokemon_link}\n{message}\n')
        print(next_pokemon_link)
        print(message)

        next_pokemon_link = get_next_pokemon_link(poke_soup)
        if (next_pokemon_link == END_URL):
            break


    print("\n\n########## FINISHED ##########\n\n")


/wiki/Bulbasaur_%28Pok%C3%A9mon%29
########################################################################################
# Index: 1
# Name: Bulbasaur / フシギダネ (Fushigidane)
# Category: Seed Pokémon
# Types: Grass / Poison
# Abilities: 
# - Overgrow 
# - Chlorophyll (Hidden Ability)
# Gender Ratio: 87.5% male / 12.5% female
########################################################################################
    
/wiki/Ivysaur_(Pok%C3%A9mon)
########################################################################################
# Index: 2
# Name: Ivysaur / フシギソウ (Fushigisou)
# Category: Seed Pokémon
# Types: Grass / Poison
# Abilities: 
# - Overgrow 
# - Chlorophyll (Hidden Ability)
# Gender Ratio: 87.5% male / 12.5% female
########################################################################################
    
/wiki/Venusaur_(Pok%C3%A9mon)
########################################################################################
# Index: 3
# Name: Venusaur / フシギバナ (Fushigibana

/wiki/Spearow_(Pok%C3%A9mon)
########################################################################################
# Index: 21
# Name: Spearow / オニスズメ (Onisuzume)
# Category: Tiny Bird Pokémon
# Types: Normal / Flying
# Abilities: 
# - Keen Eye 
# - Sniper (Hidden Ability)
# Gender Ratio: 50% male / 50% female
########################################################################################
    
/wiki/Fearow_(Pok%C3%A9mon)
########################################################################################
# Index: 22
# Name: Fearow / オニドリル (Onidrill)
# Category: Beak Pokémon
# Types: Normal / Flying
# Abilities: 
# - Keen Eye 
# - Sniper (Hidden Ability)
# Gender Ratio: 50% male / 50% female
########################################################################################
    
/wiki/Ekans_(Pok%C3%A9mon)
########################################################################################
# Index: 23
# Name: Ekans / アーボ (Arbo)
# Category: Snake Pokémon
# Type: P

/wiki/Zubat_(Pok%C3%A9mon)
########################################################################################
# Index: 41
# Name: Zubat / ズバット (Zubat)
# Category: Bat Pokémon
# Types: Poison / Flying
# Abilities: 
# - Inner Focus 
# - Infiltrator (Hidden Ability)
# Gender Ratio: 50% male / 50% female
########################################################################################
    
/wiki/Golbat_(Pok%C3%A9mon)
########################################################################################
# Index: 42
# Name: Golbat / ゴルバット (Golbat)
# Category: Bat Pokémon
# Types: Poison / Flying
# Abilities: 
# - Inner Focus 
# - Infiltrator (Hidden Ability)
# Gender Ratio: 50% male / 50% female
########################################################################################
    
/wiki/Oddish_(Pok%C3%A9mon)
########################################################################################
# Index: 43
# Name: Oddish / ナゾノクサ (Nazonokusa)
# Category: Weed Pokémon
# 

KeyboardInterrupt: 

# Sandbox
---

In [None]:
next_pokemon_link = '/wiki/Metapod_(Pok%C3%A9mon)'
poke_soup         = get_poke_soup(f'{BASE_URL}{next_pokemon_link}')
# with open('a.html', 'w') as f:
#     f.write(poke_soup.prettify())

poke_info         = get_poke_info(poke_soup)
message           = get_formatted_message(poke_info)
print(message)