In [1]:
import re
import time
import requests
from bs4 import BeautifulSoup
import sqlite3

In [2]:
BASE_URL = "https://bulbapedia.bulbagarden.net/wiki/{}_({})"

Next few sections are logic intended to retrieve stats and save them as dictionaries

In [24]:
NAME_FIX = {
    "Ho-oh": "Ho-Oh",
    "Farfetchd": "Farfetch'd",
    "Sirfetchd": "Sirfetch'd",
    "Flabebe": "Flabébé",
    "Nidoran F": "Nidoran♀",
    "Nidoran M": "Nidoran♂",
    "Mr Mime": "Mr. Mime",
    "Mime Jr": "Mime Jr.",
}

In [25]:
def fix_name(name):
    return NAME_FIX.get(name, name)

In [3]:
STAT_ORDER = ["HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"]

In [31]:
STAT_ALIASES = {"hp":"HP","attack":"Attack","defense":"Defense","defence":"Defense",
    "sp. atk":"Sp. Atk","sp atk":"Sp. Atk","spa":"Sp. Atk",
    "sp. def":"Sp. Def","sp def":"Sp. Def","spd":"Sp. Def",
    "speed":"Speed"}

In [4]:
def slugify_name(name: str) -> str:
    # Bulbapedia uses "Bulbasaur_(Pokémon)", "Mr._Mime_(Pokémon)", etc.
    return name.replace(" ", "_").replace("’", "'")

In [None]:
def fetch_html(species: str, kind="Pokémon") -> str:
    url = BASE_URL.format(fix_name(slugify_name(species)), kind)
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    return r.text

html = fetch_html("Bulbasaur")
html

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8">\n<title>Bulbasaur (Pokémon) - Bulbapedia, the community-driven Pokémon encyclopedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"0253eb9a8ac01192892aba69","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Bulbasaur_(Pokémon)","wgTitle":"Bulbasaur (Pokémon)","wgCurRevisionId":4411334,"wgRevisionId":4411334,"wgArticleId":1161,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Pokémon","Generation I Pokémon","Grass-type Pokémon","Poison-type Pokémon","Dual-type Pokémon","Pokémon with a gender ratio of

In [42]:
def parse_base_stats(html: str) -> dict:
    soup = BeautifulSoup(html, "html.parser")

    # 1) Find the “Base stats” heading
    # Bulbapedia renders headings as h3/h4 with text “Base stats”
    heading = None
    for tag in soup.select(".mw-parser-output h3, .mw-parser-output h4"):
        if tag.get_text(strip=True).lower() == "base stats":
            heading = tag
            break
    if not heading:
        # Fallback: search by anchor text near “Stat  Range / At Lv. 50 / At Lv. 100”
        text_hit = soup.find(string=re.compile(r"Stat\s*Range", re.I))
        heading = text_hit.find_parent(["h3","h4"]) if text_hit else None
        if not heading:
            raise ValueError("Couldn't locate Base stats section")
        
    container = heading.find_next(["table", "div", "ul", "dl"])
    
    stats = {}
    for stat in STAT_ORDER:
        # Look for e.g. “HP: 45”, “Sp. Atk: 65”, etc.
        m = re.search(rf"{re.escape(stat)}\s*:\s*(\d+)", container.get_text(" ", strip=True))
        if not m:
            raise ValueError(f"Couldn't parse {stat}")
        stats[stat] = int(m.group(1))
        
    return stats



In [7]:
def get_base_stats(species: str) -> dict:
    html = fetch_html(species)
    return parse_base_stats(html)

The dictionaries are then saved to a "stats" table, which holds an ID and 

In [8]:
conn = sqlite3.connect("databases/pokedex.db", timeout=5)
conn.row_factory = sqlite3.Row
c = conn.cursor()

In [9]:
c.execute('''CREATE TABLE IF NOT EXISTS stats (pokemon_id INTEGER PRIMARY KEY AUTOINCREMENT, hp INTEGER, attack INTEGER, defense INTEGER, sp_atk INTEGER, sp_def INTEGER, speed INTEGER, FOREIGN KEY(pokemon_id) REFERENCES pokemon(id))''')

<sqlite3.Cursor at 0x10b6b3880>

In [36]:
exceptions = {
    "Shedinja" : {"HP": 1, "Attack": 90, "Defense": 45, "Sp. Atk": 30, "Sp. Def": 30, "Speed": 40},
}

In [None]:
c.execute('''SELECT * FROM stats''')
rows = c.fetchall()
if rows is None:
    c.execute('''SELECT * FROM pokemon''')
    rows = c.fetchall()
    for row in rows:
        print(row["name"])
        if row["name"] in exceptions:
            stats = exceptions[row["name"]]
            continue
        else:
            stats =get_base_stats(row["name"])
            c.execute('''INSERT OR REPLACE INTO stats (pokemon_id, hp, attack, defense, sp_atk, sp_def, speed) VALUES (?, ?, ?, ?, ?, ?, ?)''',
                    (row["id"],
                    stats["HP"],
                    stats["Attack"],
                    stats["Defense"],
                    stats["Sp. Atk"],
                    stats["Sp. Def"],
                    stats["Speed"]))
        conn.commit()
        # be polite: if you’re looping through many Pokémon, add a small delay
        time.sleep(0.3)

NameError: name 'c' is not defined

In [2]:
c.execute('''SELECT * FROM stats''')
rows = c.fetchall()
for row in rows:
    print(row["pokemon_id"], row["hp"], row["attack"], row["defense"], row["sp_atk"], row["sp_def"], row["speed"])

NameError: name 'c' is not defined

In [68]:
c.execute("SELECT * FROM stats WHERE speed > 180")
rows = c.fetchall()
for row in rows:
    print(dict(row))

{'pokemon_id': 894, 'hp': 80, 'attack': 100, 'defense': 50, 'sp_atk': 100, 'sp_def': 50, 'speed': 200}


In [63]:
c.execute("SELECT name from pokemon WHERE id = ?", (894,))
row = c.fetchone()
print(row["name"])

Regieleki
