In [1]:
from bs4 import BeautifulSoup
import requests
import csv
import time
from itertools import zip_longest
import regex as re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)

## Download Example Setlist from Serebii

In [2]:
url = "https://www.serebii.net/card/skyridge/"

response = requests.get(url)

html = response.text

soup = BeautifulSoup(html, "html.parser")

raw_rows = soup.find_all("td", class_="cen")
extracted_rows = []

for num, pic, name in grouper(raw_rows, 3):
    num = num.text.replace("Skyridge", "").strip()
    name = name.text.strip()
    extracted_rows.append({"set_num": num, "card_name": name})
    

## Download Setlist from Price Charting

In [3]:
def get_set_list(pokemon_set: str):
    url = f"https://www.pricecharting.com/console/pokemon-{pokemon_set}?sort=model-number"

    # Get chrome webdriver setup
    service = ChromeService(executable_path=ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)
    driver.get(url)
    
    # With no selenium (1st Implementation):
    # response = requests.get(url)
    # html = response.text

    # Scroll to bottom to get all ressults
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    
    time.sleep(0.5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    raw_rows = soup.find_all("tr", id=True)

    set_list = []
    
    for row in raw_rows:
        #print(row)
        # Need to iterate now for all rows:
        row_text = re.split(r"#|\$|\+", row.text.replace("\n", ""))

        pokemon = row_text[0].strip()

        # Check what type of card it is
        if "Booster" in pokemon:
            continue
        elif "Holo" in pokemon:
            pokemon, card_type = re.split(r"\s\[", pokemon)
        else:
            card_type = ""

        # Get card link
        #print(row)
        soup_row = BeautifulSoup(row.text, "html.parser")
        #print(soup_row.find_all(href=re.compile("https://www.pricecharting.com/game/pokemon-\S*")))    
#         except:
#             print("\nerror row:\n")
#             print(row)
        
        # Name some stuff nicely
        number = row_text[1]
        card_type = card_type.strip("]")
        ungraded_price = row_text[2]
        psa_9_price = row_text[3]
        psa_10_price = row_text[4]

        
        # Create a clean row of data
        set_list.append(
            {
                "pokemon": pokemon,
                "card_type": card_type,
                "card_set": pokemon_set,
                "card_set_id": number,
                "ungraded_price": ungraded_price,
                "psa_9_price": psa_9_price,
                "psa_10_price": psa_10_price
            }
        )
    driver.close()
    return set_list

In [4]:
card_list = get_set_list("skyridge")
len(card_list)

248

In [5]:
def get_sales_per_card(card_url):
    return