# Getting data directly from a website
This notebook walks you through some steps in collecting data from [Bulbapedia's National Pokedex](https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number) using `requests` and `BeautifulSoup`

### Import `requests` library
This package allows you to get any website's HTML code so that you can extract from it. Let's save the website's URL in the `URL` variable.

In [None]:
import requests

URL="https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"

### Load the page

In [None]:
page=requests.get(URL)

### Parse HTML data

In [None]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(page.content, 'html.parser')

### Find all tables that contain Pokemon details

In [None]:
# Get main content <div>
poke_content=soup.find(id='mw-content-text')

# Get all <table> elements
poke_tables=poke_content.find_all('table')

### Get list of All Generation of Pokemons

In [41]:
import json
all_json = {}

for index, table in enumerate(poke_tables):
  
  if index > 0 and index <= 8:
    gen_list = table

    temp_json = []

    for i in range(info_start, len(gen_list.contents), 2):
      poke_info=gen_list.contents[i]
      kdex=poke_info.contents[1].text.strip()
      ndex=poke_info.contents[3].text.strip()
      name=poke_info.contents[7].text.strip()
      type1=poke_info.contents[9].text.strip()
      if len(poke_info.contents) > 10:
        type2=poke_info.contents[11].text.strip()
        temp_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name,
            "type1": type1,
            "type2": type2
        })
      else:
        temp_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name,
            "type1": type1
        })

    all_json["gen_" + str(index)] = temp_json

with open('bulbapedia.json', 'w') as outfile:
    json.dump(all_json, outfile)

In [43]:
# use this to pretty print the json to view its format

with open('bulbapedia.json', 'r') as handle:
    parsed = json.load(handle)
    print(json.dumps(parsed, indent=4, sort_keys=True))

{
    "gen_1": [
        {
            "kdex": "#001",
            "name": "Bulbasaur",
            "ndex": "#001",
            "type1": "Grass",
            "type2": "Poison"
        },
        {
            "kdex": "#002",
            "name": "Ivysaur",
            "ndex": "#002",
            "type1": "Grass",
            "type2": "Poison"
        },
        {
            "kdex": "#003",
            "name": "Venusaur",
            "ndex": "#003",
            "type1": "Grass",
            "type2": "Poison"
        },
        {
            "kdex": "#004",
            "name": "Charmander",
            "ndex": "#004",
            "type1": "Fire"
        },
        {
            "kdex": "#005",
            "name": "Charmeleon",
            "ndex": "#005",
            "type1": "Fire"
        },
        {
            "kdex": "#006",
            "name": "Charizard",
            "ndex": "#006",
            "type1": "Fire",
            "type2": "Flying"
        },
        {
            "kdex":