In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, TimeoutException
import pandas as pd
import csv
import time

driver_path = r"C:\browser_automation\chromedriver-win64\chromedriver.exe"

# Function to scrape the current page
def scrape_current_page(driver, writer):
    items = driver.find_elements(By.CLASS_NAME, 'itemContainer')
    for i in range(len(items)):
        item = driver.find_elements(By.CLASS_NAME, 'itemContainer')[i]
        
        try:
            link_element = item.find_element(By.CSS_SELECTOR, 'a.itemlink')
            link = link_element.get_attribute('href')
        except NoSuchElementException:
            link = 'NA'
        
        try:
            brand = item.find_element(By.CLASS_NAME, 'brand').text
        except NoSuchElementException:
            brand = 'NA'
        
        try:
            category = item.find_element(By.CLASS_NAME, 'microcategory').text
        except NoSuchElementException:
            category = 'NA'
        
        try:
            old_price = float(item.find_element(By.CLASS_NAME, 'oldprice').text.replace("$", "").replace(",", "").strip())
        except NoSuchElementException:
            old_price = 'NA'
        
        try:
            new_price = float(item.find_element(By.CLASS_NAME, 'retail-newprice').text.replace("$", "").replace(",", "").strip())
        except NoSuchElementException:
            new_price = 'NA'

        try:
            sizes_elements = item.find_elements(By.CLASS_NAME, 'aSize')
            sizes = ', '.join([size.text for size in sizes_elements])
        except NoSuchElementException:
            sizes = 'NA'

        writer.writerow({'link': link, 'brand': brand, 'category': category, 'old_price': old_price, 'new_price': new_price, 'sizes': sizes})

def scrape_website(link, driver, writer):
    driver.get(link)
    
    while True:
        scrape_current_page(driver, writer)
        try:
            next_button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CLASS_NAME, 'nextPage')))
            driver.execute_script("arguments[0].click();", next_button)
            WebDriverWait(driver, 2).until(EC.staleness_of(next_button))
        except (TimeoutException, NoSuchElementException):
            break

# Read links from text file
with open('links.txt', 'r') as file:
    links = file.readlines()

# Setup Selenium WebDriver
service = Service(executable_path=driver_path)
driver = webdriver.Chrome(service=service)

# Open a CSV file to write data
with open('scraped_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['link', 'brand', 'category', 'old_price', 'new_price', 'sizes'])
    writer.writeheader()

    for link in links:
        scrape_website(link.strip(), driver, writer)

driver.quit()

In [None]:
driver.quit()