In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
import csv
from datetime import datetime

chrome_options = Options()
chrome_options.add_argument('--start-maximized')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--disable-popup-blocking')

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

driver.get("https://torecacamp-pokemon.com/")
print("Website loaded.")

# handle pop ups
time.sleep(4)
try:
    # close pop up if it exists
    close_button = driver.find_element(By.CSS_SELECTOR, ".popup-close-button")
    close_button.click()
    print("Popup closed.")
except:
    driver.refresh()
    time.sleep(3)
    print("Page refreshed.")

# csv files to incrementally save data
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"torecacamp_{current_datetime}.csv"

csv_file = open(csv_filename, 'w', newline='', encoding='utf-8-sig')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Category', 'Subcategory', 'Card Name', 'Price', 'URL'])

all_data = []

try:
    # find all main nav items through xpath
    nav_items = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.XPATH, "/html/body/div[3]/section/nav/div/div/ul/li"))
    )
    
    # only get items 3 to 8 (Scarlet and violet to Legend)
    for i in range(2, 8):
        if i >= len(nav_items):
            print(f"Nav item {i+1} doesn't exist. Skipping.")
            continue
            
        print(f"\nProcessing main nav item {i+1}")
        
        main_nav = nav_items[i]
        ActionChains(driver).move_to_element(main_nav).perform()
        time.sleep(1)
        
        # find all dropdown items for this nav through xpath
        try:
            dropdown_items = WebDriverWait(driver, 5).until(
                EC.presence_of_all_elements_located((By.XPATH, f"/html/body/div[3]/section/nav/div/div/ul/li[{i+1}]/ul/li"))
            )
            
            main_category = main_nav.text.strip()
            
            # Process first dropdown item (promos)
            if len(dropdown_items) >= 1:
                first_dropdown = dropdown_items[0]
                subcategory = first_dropdown.text.strip()
                
                # Check if it has a direct link
                try:
                    link = first_dropdown.find_element(By.TAG_NAME, "a")
                    link_url = link.get_attribute("href")
                    link_text = link.text.strip()
                    
                    print(f"  Clicking on promo: {link_text}")
                    
                    driver.execute_script("window.open(arguments[0]);", link_url)
                    
                    driver.switch_to.window(driver.window_handles[-1])
                    time.sleep(3)
                    
                    # scrape all pages of cards
                    page_num = 1
                    while True:
                        print(f"    Scraping promo page {page_num}")
                        
                        WebDriverWait(driver, 10).until(
                            EC.presence_of_element_located((By.CSS_SELECTOR, ".product-item__title"))
                        )
                        
                        cards = driver.find_elements(By.CSS_SELECTOR, ".product-item__title")
                        prices = driver.find_elements(By.CSS_SELECTOR, ".price-list span:not(.visually-hidden)")
                        
                        # process each card
                        for k in range(len(cards)):
                            if k < len(prices):
                                card_name = cards[k].text.strip()
                                card_price = prices[k].text.strip()
                                
                                csv_writer.writerow([main_category, subcategory, card_name, card_price, driver.current_url])
                                
                                # also keep in memory
                                all_data.append({
                                    "Category": main_category,
                                    "Subcategory": subcategory,
                                    "Card Name": card_name,
                                    "Price": card_price,
                                    "URL": driver.current_url
                                })
                        
                        # check for next page
                        try:
                            next_button = WebDriverWait(driver, 5).until(
                                EC.presence_of_element_located((By.XPATH, "//a[@rel='next']"))
                            )
                            next_button.click()
                            page_num += 1
                            time.sleep(3)
                        except:
                            print("    No more promo pages.")
                            break
                    
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])
                    time.sleep(1)
                    
                    # Need to hover over main nav again to reset the menu state
                    ActionChains(driver).move_to_element(main_nav).perform()
                    time.sleep(1)
                    
                except Exception as e:
                    print(f"  Error processing promo link: {str(e)}")
            
            # Process second dropdown item (expansion packs)
            if len(dropdown_items) >= 2:
                second_dropdown = dropdown_items[1]
                
                ActionChains(driver).move_to_element(second_dropdown).perform()
                time.sleep(1)
                
                # get all sub-dropdown items
                sub_dropdown_items = WebDriverWait(driver, 5).until(
                    EC.presence_of_all_elements_located((By.XPATH, f"/html/body/div[3]/section/nav/div/div/ul/li[{i+1}]/ul/li[2]/ul/li"))
                )
                
                subcategory = second_dropdown.text.strip()
                
                # process each sub-dropdown item
                for j, sub_item in enumerate(sub_dropdown_items):
                    try:
                        link = sub_item.find_element(By.TAG_NAME, "a")
                        link_url = link.get_attribute("href")
                        link_text = link.text.strip()
                        
                        print(f"  Clicking on: {link_text}")
                        
                        driver.execute_script("window.open(arguments[0]);", link_url)
                        
                        driver.switch_to.window(driver.window_handles[-1])
                        time.sleep(3)
                        
                        # scrape all pages of cards
                        page_num = 1
                        while True:
                            print(f"    Scraping page {page_num}")
                            
                            WebDriverWait(driver, 10).until(
                                EC.presence_of_element_located((By.CSS_SELECTOR, ".product-item__title"))
                            )
                            
                            cards = driver.find_elements(By.CSS_SELECTOR, ".product-item__title")
                            prices = driver.find_elements(By.CSS_SELECTOR, ".price-list span:not(.visually-hidden)")
                            
                            # process each card
                            for k in range(len(cards)):
                                if k < len(prices):
                                    card_name = cards[k].text.strip()
                                    card_price = prices[k].text.strip()
                                    
                                    csv_writer.writerow([main_category, subcategory, card_name, card_price, driver.current_url])
                                    
                                    # also keep in memory
                                    all_data.append({
                                        "Category": main_category,
                                        "Subcategory": subcategory,
                                        "Card Name": card_name,
                                        "Price": card_price,
                                        "URL": driver.current_url
                                    })
                            
                            # check for next page
                            try:
                                next_button = WebDriverWait(driver, 5).until(
                                    EC.presence_of_element_located((By.XPATH, "//a[@rel='next']"))
                                )
                                next_button.click()
                                page_num += 1
                                time.sleep(3)
                            except:
                                print("    No more pages.")
                                break
                        
                        driver.close()
                        driver.switch_to.window(driver.window_handles[0])
                        time.sleep(1)
                        
                        ActionChains(driver).move_to_element(main_nav).perform()
                        time.sleep(1)
                        
                        dropdown_items = WebDriverWait(driver, 5).until(
                            EC.presence_of_all_elements_located((By.XPATH, f"/html/body/div[3]/section/nav/div/div/ul/li[{i+1}]/ul/li"))
                        )
                        second_dropdown = dropdown_items[1]
                        ActionChains(driver).move_to_element(second_dropdown).perform()
                        time.sleep(1)
                        
                    except Exception as e:
                        print(f"    Error processing sub-item {j+1}: {str(e)}")
                        if len(driver.window_handles) > 1:
                            driver.close()
                            driver.switch_to.window(driver.window_handles[0])
            else:
                print(f"  No dropdown items found for nav item {i+1}")
                
        except Exception as e:
            print(f"  Error processing dropdown for nav item {i+1}: {str(e)}")
            
except Exception as e:
    print(f"Error in main navigation process: {str(e)}")

csv_file.close()

df = pd.DataFrame(all_data)

driver.quit()
print(f"Scraping finished! Raw data saved to {csv_filename}")

# Save the raw data without cleaning
df.to_csv(f"raw_torecacamp_{current_datetime}.csv", index=False)


Website loaded.
Page refreshed.

Processing main nav item 3
  Clicking on promo: PROMO
    Scraping promo page 1
    Scraping promo page 2
    Scraping promo page 3
    Scraping promo page 4
    Scraping promo page 5
    Scraping promo page 6
    Scraping promo page 7
    Scraping promo page 8
    Scraping promo page 9
    Scraping promo page 10
    Scraping promo page 11
    Scraping promo page 12
    Scraping promo page 13
    Scraping promo page 14
    Scraping promo page 15
    Scraping promo page 16
    Scraping promo page 17
    Scraping promo page 18
    Scraping promo page 19
    Scraping promo page 20
    Scraping promo page 21
    Scraping promo page 22
    Scraping promo page 23
    Scraping promo page 24
    No more promo pages.
  Clicking on: SV10 / ロケット団の栄光
    Scraping page 1
    Scraping page 2
    Scraping page 3
    Scraping page 4
    Scraping page 5
    Scraping page 6
    Scraping page 7
    No more pages.
  Clicking on: SV9a / 熱風のアリーナ
    Scraping page 1
    Scrap