# Util Functions
---

- ### Formatar uma saída.

In [12]:
def get_formatted_message(poke_info):
    type_label = 'Types' if len(poke_info['types']) > 1 else 'Type'

    return """\
########################################################################################
# Index: %d
# Name: %s / %s (%s)
# Caregory: %s
# %s: %s
########################################################################################
    """ % (
        poke_info['core']['index'],
        poke_info['core']['name'],
        poke_info['core']['jp_name'],
        poke_info['core']['jp_rom_name'],
        poke_info['core']['category'],
        type_label,
        ' / '.join(poke_info['types'])
    )


---
&nbsp;
- ### Recriar arquivos dado o nome.

In [13]:
import os

def recreate_file(file_name):
    if (os.path.exists(file_name)):
        os.remove(file_name)

    open(file_name, 'x')



# BS4 Related
---

- ### Carregar a página destino e retornar o soup.

In [5]:
from urllib.request import urlopen as uOpen, Request as uReq
from bs4 import BeautifulSoup as soup

def get_poke_soup(link):
    uClient         = uReq(link, headers={'User-Agent': 'Magic Browser'})
    uCon            = uOpen(uClient)
    poke_page_html  = uCon.read()
    uCon.close()
    
    return soup(poke_page_html, 'html.parser')

---
&nbsp;
- ### Checar se é a última página a ser verificada.
    - Em alguns casos, quando tá perto do lançamento de um novo jogo e só se tem informação de um ou mais pokémons, às vezes o Bulbapédia coloca informações não oficiais com um layout diferente.

In [6]:
def check_last_page(poke_soup):
    return poke_soup.find(id='mw-content-text').table.a['href'] == "/wiki/File:BulbaShadow.png"

---
&nbsp;
- ### Pegar o link do próximo pokémon

In [7]:
def get_next_pokemon_link(poke_soup):
    npl = poke_soup.find(id='mw-content-text').table
    try:
        npl = npl.findChildren('tr', recursive=False)[1]    \
                 .findChildren('td', recursive=False)[2]
    except IndexError:
        npl = npl.findChildren('tr', recursive=False)[0]    \
                 .findChildren('td', recursive=False)[2]
    finally:
        return npl.find('a')['href']            

In [8]:
def get_poke_info(poke_soup):
    info_table = poke_soup                      \
                    .find(id='mw-content-text') \
                    .find_all('table', recursive=False)[1]

    core  = get_core_poke_info(info_table)
    types = get_poke_types(info_table)
    
    return {
        'core': {
            'index':       core[0],
            'name':        core[1],
            'category':    core[2],
            'jp_name':     core[3],
            'jp_rom_name': core[4]
        },
        'types': types
    }

In [9]:
def get_core_poke_info(info_table):
    base_info_container = info_table.tr.td.table.tr

    info_container = base_info_container.td.table.tr      \
                        .find_all('td', recursive=False)
    
    return (
        int(base_info_container.th.find('a').text.replace('#', '')), # index

        info_container[0].big.text,                                  # name
        info_container[0].a.text,                                    # category
        info_container[1].span.text,                                 # jp_name
        info_container[1].i.text                                     # jp_rom_name
    )

In [10]:
def get_poke_types(info_table):
    types = info_table.find_all('tr', recursive=False)[1]           \
                        .table.find('td', attrs={'style': None})    \
                        .find_all('a')
    
    return [t.text for t in types if t.text != 'Unknown']


# Main Cycle
---

In [None]:
    BASE_URL  = 'https://bulbapedia.bulbagarden.net'
    END_URL   = '/wiki/%3F%3F%3F_(Pok%C3%A9mon)'
    FILE_NAME = 'pokemon_list.txt'

In [15]:
    recreate_file(FILE_NAME)

    with open(FILE_NAME, 'w') as f:
        next_pokemon_link = '/wiki/Bulbasaur_%28Pok%C3%A9mon%29'

        while(True):
            poke_soup = get_poke_soup(f'{BASE_URL}{next_pokemon_link}')
            if check_last_page(poke_soup):
                break
            
            poke_info = get_poke_info(poke_soup)
            message   = get_formatted_message(poke_info)

            f.write(message + '\n')
            print(message)

            next_pokemon_link = get_next_pokemon_link(poke_soup)
            if (next_pokemon_link == END_URL):
                break
            

        print("\n\n########## FINISHED ##########\n\n")


NameError: name 'FILE_NAME' is not defined