# A Pokekmon Type Webscraper

### Step 1 - Retrieve the HTML

In [1]:
import json
from urllib.request import urlopen, Request

def get_html(url):

    # Request access to the page
    request = Request(url, headers={'User-Agent': 'Mozilla/5.0'})

    # Open the page
    page = urlopen(request)

    # Read the HTML
    return page.read().decode("utf-8")

### Step 2 - Retrieve the Type

In [2]:
import re

def get_pokemon_type(html):

    # Find the type container
    html = re.search("<tr>\n<th>Type</th>(.*?)</tr>", html, re.DOTALL).group(1)

    # Find all types
    html = re.findall("<a class=\"type-icon type-(.*?)\"", html)

    # Create a list of entries
    types = []

    # For every entry...
    for type in html:

        # Append the entry to the list
        types.append(type)

    # Return the entries without the whitespace
    return types

### Step 3 - Store in a Dictionary

In [3]:
def record_entries(pokemon, entries):

    # Store the entries in the dictionary
    pokemon_dictionary[pokemon] = entries

### Step 4 - Find the Next Pokemon

In [4]:
def get_next_pokemon(html):

    # Find the next URL
    pokemon = re.search("<a rel=\"next\" class=\"entity-nav-next\" href=\".*?\">", html, re.IGNORECASE)

    # Turn the URl into a string
    pokemon = pokemon.group()

    # Obtain only the pokemon
    pokemon = re.sub("<a rel=\"next\" class=\"entity-nav-next\" href=\"/pokedex/", "", pokemon)
    pokemon = re.sub("\">", "", pokemon)

    # Return the pokemon name
    return pokemon

### Step 5 - Scrape the Data of all 151 Pokemon

In [5]:
# Find the base URL
base_url = "https://pokemondb.net/pokedex/"

# Find the first pokemon
pokemon = "bulbasaur"

# Create a dictionary of entries for each pokemon
pokemon_dictionary = dict()

# For all 151 Pokemon...
for i in range(151):

    # Get the HTML
    html = get_html(base_url + pokemon)

    # Get the pokedex entries
    entries = get_pokemon_type(html)

    # Store the pokemon's entries in the file
    record_entries(pokemon, entries)

    # Find the next pokemon
    pokemon = get_next_pokemon(html)

# Write the dictionary to the file
with open("../data_files/pokemon_types.json", "w", encoding='utf-8') as f:
    json.dump(pokemon_dictionary, f, indent=4, sort_keys=True)

# Report that the process is complete!
print("Done!")


Done!
