<a href="https://colab.research.google.com/github/dnachavez/Pokemon-Pokedex-Scraper/blob/main/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
import re
from tqdm import tqdm

url = "https://pokemondb.net/pokedex/all"

def fetch_pokedex_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching the Pokédex page: {e}")
        return None

def parse_pokedex_page(html_content):
    try:
        soup = BeautifulSoup(html_content, 'html.parser')
        table = soup.find('table', {'id': 'pokedex'})
        headers = [header.text for header in table.find('thead').find_all('th')]
        headers.insert(1, "Image URL")
        rows = table.find('tbody').find_all('tr')
        return headers, rows
    except Exception as e:
        print(f"Error parsing the Pokédex page: {e}")
        return None, None

def extract_pokemon_data(rows):
    pokedex = []
    clean_regex = re.compile(r'\s+')

    try:
        for row in tqdm(rows, desc="Scraping Pokémon data"):
            cells = row.find_all('td')
            number = clean_regex.sub(' ', cells[0].find('span', class_='infocard-cell-data').text.strip())
            img_url = cells[0].find('img')['src']
            data = [clean_regex.sub(' ', cell.text.strip()) for cell in cells[1:]]
            types = ', '.join([type_tag.text for type_tag in cells[2].find_all('a')])
            data[1] = types
            data.insert(0, img_url)
            data.insert(0, number)
            pokedex.append(data)
        return pokedex
    except Exception as e:
        print(f"Error extracting Pokémon data: {e}")
        return []

def save_to_csv(data, headers, filename):
    try:
        df = pd.DataFrame(data, columns=headers)
        df.to_csv(filename, index=False)
        print(f"Data saved to {filename}")
    except Exception as e:
        print(f"Error saving data to CSV: {e}")

def save_to_db(data, headers, db_filename):
    try:
        df = pd.DataFrame(data, columns=headers)
        conn = sqlite3.connect(db_filename)
        df.to_sql('pokedex', conn, if_exists='replace', index=False)
        conn.close()
        print(f"Data saved to {db_filename}")
    except Exception as e:
        print(f"Error saving data to database: {e}")

def main():
    html_content = fetch_pokedex_page(url)
    if html_content:
        headers, rows = parse_pokedex_page(html_content)
        if headers and rows:
            data = extract_pokemon_data(rows)
            if data:
                save_to_csv(data, headers, 'pokedex.csv')
                save_to_db(data, headers, 'pokedex.db')

if __name__ == "__main__":
    main()


Scraping Pokémon data: 100%|██████████| 1215/1215 [00:00<00:00, 5611.16it/s]


Data saved to pokedex.csv
Data saved to pokedex.db
