In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# URL of the RockAuto page
url = 'https://www.rockauto.com/fr/partsearch/?mfr=AIRTEX'

# Initialize the WebDriver (for example, using Firefox)
driver = webdriver.Firefox()

# Send GET request to the page
driver.get(url)

data = []

In [3]:
try:
    # Increase the timeout to 20 seconds and modify the wait condition
    WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "listing-text-row-moreinfo-truck")))

    # Get the page source after it's fully loaded
    page_source = driver.page_source

    # Parse the content with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find all part listings (modify based on exact container div structure)
    part_listings = soup.find_all('td', class_='listing-border-top-line listing-inner-content')

    for index, part_listing in enumerate(part_listings):
        # Extract part name (manufacturer and part number)
        part_manufacturer = part_listing.find('span', class_='listing-final-manufacturer').text.strip() if part_listing.find('span', class_='listing-final-manufacturer') else 'Unknown'
        part_number = part_listing.find('span', class_='listing-final-partnumber').text.strip() if part_listing.find('span', class_='listing-final-partnumber') else 'Unknown'

        # Extract the link for more information
        more_info_link = part_listing.find('a', href=True)['href'] if part_listing.find('a', href=True) else 'N/A'

        # Append the scraped data without price for now
        data.append({
            'Manufacturer': part_manufacturer,
            'Part Number': part_number,
            'Price': 'N/A',  # Placeholder for price
            'Details Link': 'https://www.rockauto.com' + more_info_link
        })

    # Now extract the price using the dynamic IDs
    for index in range(len(data)):
        try:
            # Construct the dynamic ID for price
            dynamic_price_id = f"dprice[{3 + index}][v]"  # Starting from the correct number
            price_tag = driver.find_element(By.ID, dynamic_price_id)
            price = price_tag.text.strip() if price_tag else 'N/A'
            data[index]['Price'] = price  # Update the corresponding entry with the price
        except NoSuchElementException:
            data[index]['Price'] = 'N/A'  # Ensure the price entry remains consistent

except TimeoutException:
    print("Timeout: The page took too long to load or the element was not found.")

finally:
    # Close the WebDriver after scraping
    driver.quit()

In [4]:
# Create a DataFrame from the scraped data
df = pd.DataFrame(data)

# Print the DataFrame
print(df)

   Manufacturer Part Number   Price  \
0        AIRTEX       FS193   $0.73   
1        AIRTEX       FS240   $0.73   
2        AIRTEX        FS29   $0.88   
3        AIRTEX       FS192   $0.96   
4        AIRTEX       FS190   $1.00   
5        AIRTEX       FS195   $1.07   
6        AIRTEX       FS151   $1.46   
7        AIRTEX       FS237   $2.72   
8        AIRTEX       FS233   $3.23   
9        AIRTEX       FS165   $3.87   
10       AIRTEX       FS215   $4.72   
11       AIRTEX      WH3001   $5.00   
12       AIRTEX       FS194   $5.21   
13       AIRTEX       E2390   $6.08   
14       AIRTEX       E2521   $6.08   
15       AIRTEX      LR3003   $6.08   
16       AIRTEX      WH3002   $6.24   
17       AIRTEX       FS182   $7.29   
18       AIRTEX       E2318   $9.45   
19       AIRTEX       E2254   $9.67   
20       AIRTEX      AW9019  $10.46   
21       AIRTEX       E8643  $11.85   
22       AIRTEX      AW9029  $16.36   
23       AIRTEX      AW6828  $17.74   
24       AIRTEX       413

In [5]:
# Save the DataFrame to an Excel file
df.to_excel('rockauto_parts.xlsx', index=False)