# Config

## Selenium + BS4

In [18]:
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

import pandas as pd

import os

In [2]:
# Get the ChromeDriver path from your environment variable
chrome_driver_path = os.getenv('chrome_driver_path')

# Setup WebDriver
service = Service(chrome_driver_path)  # Use the path from environment variable

## Scrape the main page

In [3]:
all_pokedex_url = "https://pokemondb.net/pokedex/all"

In [4]:
try:
    driver = webdriver.Chrome(service=service)
    driver.get(all_pokedex_url)

    soup = BeautifulSoup(driver.page_source, 'html.parser')

except TimeoutException:
    print("Timed out waiting for cookie pop-up or other elements")
    
finally:
    # Close the browser
    driver.quit()

## Extract from Soup object

In [13]:
# Find the table body containing all Pokémon rows
table_body = soup.find('tbody')
rows = table_body.find_all('tr')  # Each row corresponds to one Pokémon

# List to store the extracted data
pokemon_data = []

# Iterate through each row
for row in rows:
    # Extract the columns
    cols = row.find_all('td')
    
    # Extract individual data points
    number = cols[0].find('span', class_='infocard-cell-data').text.strip()
    image_url = cols[0].find('img')['src']
    name = cols[1].find('a', class_='ent-name').text.strip()

    # Check for subtitle
    subtitle_tag = cols[1].find('small', class_='text-muted')
    subtitle = subtitle_tag.text.strip() if subtitle_tag else ""  # Extract text if present

    types = [t.text for t in cols[2].find_all('a')]  # Multiple types
    total = cols[3].text.strip()
    hp = cols[4].text.strip()
    attack = cols[5].text.strip()
    defense = cols[6].text.strip()
    sp_atk = cols[7].text.strip()
    sp_def = cols[8].text.strip()
    speed = cols[9].text.strip()
    
    # Append the data as a dictionary
    pokemon_data.append({
        "#": number,
        "Image URL": image_url,
        "Name": name,
        "Subtitle": subtitle,
        "Type": types,
        "Total": total,
        "HP": hp,
        "Attack": attack,
        "Defense": defense,
        "Sp. Atk": sp_atk,
        "Sp. Def": sp_def,
        "Speed": speed
    })

In [None]:
# Convert to pandas df
df = pd.DataFrame(pokemon_data)

In [20]:
# Define the relative path to the data folder
relative_path = os.path.join("data", "pokemon_main_stats_data.csv")

In [21]:
# Save the DataFrame to the relative path
df.to_csv(relative_path, index=False)