In [3]:
# Amazon Seller:

import time
from datetime import datetime
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def scrape_product_details(driver):
    """
    Function to scrape product details from the product page.

    Args:
    - driver (webdriver.Chrome): WebDriver instance for controlling the browser.

    Returns:
    - product_details (dict): Dictionary containing scraped product details.
    """
    product_details = {}
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    # Scrape the product title
    title_span = soup.find("span", id="productTitle")
    if title_span:
        product_details['title'] = title_span.text.strip()

    # Scrape the product price using JavaScript
    script = """
    var price = null;
    var priceElement = document.querySelector('span.a-price span[aria-hidden="true"]');
    if (priceElement) {
        price = priceElement.textContent.trim();
    }
    return price;
    """
    price = driver.execute_script(script)
    if price:
        product_details['price'] = price
    else:
        product_details['price'] = "Not available"

    # Scrape the seller information
        # Scrape the seller information
    seller_element = soup.find("div", id="merchant-info")
    if seller_element:
        seller_info_text = ' '.join(seller_element.stripped_strings)
        product_details['product_seller_info'] = seller_info_text
    else:
        product_details['product_seller_info'] = "Seller information not found"

    return product_details

def scrape_seller_info(driver):
    """
    Function to scrape seller information from the specified <div> structure.

    Args:
    - driver (webdriver.Chrome): WebDriver instance for controlling the browser.

    Returns:
    - seller_info (str): Extracted seller information.
    """
    try:
        # Wait for the seller information element to be present using the first selector
        seller_element = driver.find_elements(By.CSS_SELECTOR, "div[id^='imcx-result-item-shipsFromSoldBy-'] div.no-margin-bottom")
       
        # If the seller element is not found using the first selector, try the alternative selector
        if not seller_element:
            seller_element = driver.find_elements(By.CSS_SELECTOR, "div[id^='imcx-result-item-shipsFromSoldBy-']")
       
        # If both selectors fail, check for the new structure
        if not seller_element:
            seller_element = driver.find_elements(By.CSS_SELECTOR, "div.combo-seller-info")
       
        if seller_element:
            # Parse the HTML of the seller element using BeautifulSoup
            seller_soup = BeautifulSoup(seller_element[0].get_attribute('outerHTML'), 'html.parser')
           
            # Extract the seller information from the parsed HTML
            seller_info = seller_soup.text.strip()
           
            return seller_info
       
    except Exception as e:
        print(f"Error scraping seller information: {e}")
        return "Seller information not found"

def scrape_bulk_order_prices(driver):
    """
    Function to scrape bulk order prices and seller information for multiple quantities.

    Args:
    - driver (webdriver.Chrome): WebDriver instance for controlling the browser.

    Returns:
    - bulk_prices (dict): Dictionary containing bulk prices and seller info for different quantities.
    """
    bulk_prices = {}
    quantities = [5, 10, 50, 100, 150]

    for quantity in quantities:
        try:
            # Wait for the quantity input to be present and visible
            quantity_input = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "input.b-form-control"))
            )
           
            # Enter the quantity in the input field
            quantity_input.send_keys(Keys.CONTROL + "a")  # Select all text in the input field
            quantity_input.send_keys(Keys.BACKSPACE)    # Clear the input field
            quantity_input.send_keys(str(quantity))     # Enter the desired quantity
            quantity_input.send_keys(Keys.RETURN)       # Press Enter key

            # Wait for the price to update (adjust sleep time as necessary)
            time.sleep(10)
           
            # Locate the price element
            price_span = WebDriverWait(driver, 3).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div.price-shipping.basket-cost.exclusive-price span.b-text-base.b-text-lead"))
            )

            if (price_span):
                price = price_span.text.strip()
                bulk_prices[quantity] = {"price": price}
            else:
                bulk_prices[quantity] = {"price": "Price not available"}

            # Scrape the seller information after updating the quantity
            seller_info = scrape_seller_info(driver)
            bulk_prices[quantity]["seller_info"] = seller_info

        except Exception as e:
            print(f"An error occurred while scraping bulk order prices for quantity {quantity}: {e}")

    return bulk_prices

def main():
    # User credentials
    email = "ryansrealestatedesign@gmail.com"
    password = "TheN3st&Ift"
   
    # Load product links from an Excel file
    file_path = input("Enter the path to the Excel file: ")
    df = pd.read_excel(file_path)
   
    print("Columns in the loaded Excel file:", df.columns)
   
    try:
        product_links = df['links'].tolist()
    except KeyError:
        print("Error: 'links' column not found in the Excel file. Please check the file and try again.")
        raise
   
    # Set up Chrome WebDriver
    chrome_options = Options()
    # Uncomment the following line to run in headless mode (without opening a browser window)
    # chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
   
    # Open Amazon website
    driver.get('https://www.amazon.com')
   
    # Wait for the page to load (adjust sleep time as necessary)
    time.sleep(20)
   
    # Click the sign-in link
    sign_in_link = driver.find_element(By.ID, "nav-link-accountList")
    sign_in_link.click()
   
    # Wait for the sign-in page to load (adjust sleep time as necessary)
    time.sleep(5)
   
    # Enter the email
    email_input = driver.find_element(By.ID, "ap_email")
    email_input.send_keys(email)
   
    # Click the continue button
    continue_button = driver.find_element(By.ID, "continue")
    continue_button.click()
   
    # Wait for the password input page to load (adjust sleep time as necessary)
    time.sleep(5)
   
    # Enter the password
    password_input = driver.find_element(By.ID, "ap_password")
    password_input.send_keys(password)
   
    # Click the sign-in button
    sign_in_button = driver.find_element(By.ID, "signInSubmit")
    sign_in_button.click()
   
    # Wait for the CAPTCHA page to load (adjust sleep time as necessary, handle CAPTCHA manually if needed)
    time.sleep(50)
   
   
    # Initialize a list to store the product details
    products = []

    # Iterate through the product links and scrape details
    for product_link in df['links']:
        try:
            # Navigate to the product page
            driver.get(product_link.strip())
           
            # Wait for the product page to load (adjust sleep time as necessary)
            time.sleep(10)

            # Scrape product details
            product_details = scrape_product_details(driver)

            # Check if bulk ordering link is present
            bulk_ordering_link = None
            try:
                # Find the bulk ordering link by its criteria
                bulk_ordering_link = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, "//a[contains(@href, '/ab/bulk-order/bebx/api/v2/fastForwardBasket') and contains(@href, 'quantity=1') and (contains(@href, 'type=DPX_UPSELL') or contains(@href, 'type=DP_RFQD_TRANSIENT'))]"))
                )
            except Exception as e:
                print(f"No bulk ordering link found for {product_link}")

            if (bulk_ordering_link):
                try:
                    # Click bulk ordering link
                    driver.execute_script("arguments[0].click();", bulk_ordering_link)

                    # Switch to the new tab
                    driver.switch_to.window(driver.window_handles[1])

                    # Wait for the bulk ordering page to load
                    WebDriverWait(driver, 15).until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "input.b-form-control"))
                    )
                    time.sleep(3)

                    # Scrape bulk order prices and seller information
                    bulk_data = scrape_bulk_order_prices(driver)

                    # Add bulk prices and seller info to product_details
                    for quantity, data in bulk_data.items():
                        product_details[f'bulk_price_{quantity}'] = data["price"]
                        product_details[f'seller_info_{quantity}'] = data["seller_info"]

                    # Close the bulk ordering tab and switch back to the original product page
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])

                except Exception as e:
                    print(f"An error occurred while handling bulk order link: {e}")

            else:
                if (product_details['price'] == "Not available"):
                    product_details['price'] = "Out of stock/unavailable"

            # Add the product details to the list
            if ('price' in product_details):
                product_details['link'] = product_link.strip()
                product_details['timestamp'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                products.append(product_details)
            else:
                print(f"No price found for {product_link}")

            # Wait for a few seconds before moving to the next product (adjust sleep time as necessary)
            time.sleep(10)

        except Exception as e:
            print(f"An error occurred while processing {product_link}: {e}")

    # Convert the list of products into a DataFrame
    df_output = pd.DataFrame(products)
    # Output file name with unique timestamp
    output_file = f"output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"

    # Write the DataFrame to Excel
    df_output.to_excel(output_file, index=False)

    # Provide the link to download the Excel file
    print(f"Scraped data has been saved to the Excel file: {output_file}")


if __name__ == '__main__':
    main()



Enter the path to the Excel file:  3set30linksamazon.xlsx


Columns in the loaded Excel file: Index(['links'], dtype='object')
An error occurred while handling bulk order link: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7E35A9632+30946]
	(No symbol) [0x00007FF7E355E3C9]
	(No symbol) [0x00007FF7E3456FDA]
	(No symbol) [0x00007FF7E34A822C]
	(No symbol) [0x00007FF7E34A850C]
	(No symbol) [0x00007FF7E34EDCB7]
	(No symbol) [0x00007FF7E34CCAAF]
	(No symbol) [0x00007FF7E34EB041]
	(No symbol) [0x00007FF7E34CC813]
	(No symbol) [0x00007FF7E349A6E5]
	(No symbol) [0x00007FF7E349B021]
	GetHandleVerifier [0x00007FF7E36DF83D+1301229]
	GetHandleVerifier [0x00007FF7E36EBDB7+1351783]
	GetHandleVerifier [0x00007FF7E36E2A03+1313971]
	GetHandleVerifier [0x00007FF7E35DDD06+245686]
	(No symbol) [0x00007FF7E356758F]
	(No symbol) [0x00007FF7E3563804]
	(No symbol) [0x00007FF7E3563992]
	(No symbol) [0x00007FF7E355A3EF]
	BaseThreadInitThunk [0x00007FF8753E257D+29]
	RtlUserThreadStart [0x00007FF87694AA48+40]

An error occurred while handling bulk order link: Message:

In [4]:

# Build Seller :

import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import time
import os

# Function to log in to the website
def login(driver):
    driver.get('https://www.build.com/account/login')

    # Find the parent div for email directly
    email_parent_div = driver.find_element(By.CLASS_NAME, "border-box.db.w-100.b")

    if email_parent_div:
        # Find the email input within the email parent div
        #email_input = email_parent_div.find_element(By.CLASS_NAME, "input-reset input br2 f5 ph3 ba w-100 truncate  input f5 b--theme-error")
        #email_input = driver.find_element(By.CSS_SELECTOR, '.input-reset.input.br2.f5.ph3.ba.w-100.truncate.input.f5.theme-grey-darker.b--theme-tertiary')
        email_input = driver.find_element(By.CSS_SELECTOR, 'input[type="email"][name="username"]')

        #input-reset input br2 f5 ph3 ba w-100 truncate  input f5 b--theme-error
        if email_input:
            # Enter email
            email_input.send_keys("")
            print("Email input found and value entered.")
        else:
            print("Email input not found.")
   
    # Find the parent div for password directly
    password_parent_div = driver.find_element(By.CLASS_NAME, "border-box.db.w-100.b.mt2")

    if password_parent_div:
        # Find the password input within the password parent div
        password_input = password_parent_div.find_element(By.ID, "password")
       
        if password_input:
            # Enter the password
            password_input.send_keys("")  # replace with actual password
            print("Password input found and value entered.")
           
            # Find the login button by its class name and click it
            login_button = driver.find_element(By.CLASS_NAME, "pointer.ba.br2.w-inherit.fw4.bg-theme-primary.theme-white.b--theme-primary.hover-bg-theme-primary-dark.hover-theme-white.hover-b--theme-primary-dark.active-bg-theme-primary-darker.active-b--theme-primary-darker.input.f5.ph4")
       
            if login_button:
                login_button.click()
                print("Login button found and clicked.")
                time.sleep(5)  # Wait for login to complete
            else:
                print("Login button not found.")
        else:
            print("Password input not found.")
    else:
        print("Password parent div not found.")

# Function to scrape titles and prices
def scrape_titles_and_prices(driver, input_file):
    # Read the Excel file containing product links
    df = pd.read_excel(input_file)

    # Function to scrape title and price for a given link
    def scrape_title_and_price(link):
        try:
            # Go to the provided link
            driver.get(link)
            # Allow the page to load completely
            time.sleep(5)

            # Get the page source
            page_source = driver.page_source
            # Parse the page source with BeautifulSoup
            soup = BeautifulSoup(page_source, 'html.parser')

            # Find the title element
            title_element = soup.find('h1', class_='ma0 fw6 lh-title di f5 f3-ns')
            title = title_element.text.strip() if title_element else "Title not found"

            # Find the price element
            price_element = soup.find('div', class_='flex flex-row flex-nowrap justify-start mr4 f4 f3-ns pt2')
            price = price_element.text.strip() if price_element else "Price not found"

            return title, price
        except Exception as e:
            print(f"An error occurred while scraping {link}: {e}")
            return "Error", "Error"

    # Scraping titles and prices
    results = []
    for index, row in df.iterrows():
        link = row['Link']
        title, price = scrape_title_and_price(link)
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        results.append([link, title, price, timestamp])

    # Create a DataFrame from the results
    output_df = pd.DataFrame(results, columns=['Link', 'Title', 'Price', 'Timestamp'])

    # Output file name with unique timestamp
    output_file = f"output_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"

    # Write the DataFrame to Excel
    output_df.to_excel(output_file, index=False)

    # Provide the link to download the Excel file
    print(f"Scraped data has been saved to the Excel file: {output_file}")

def main():
    # Initialize the undetected Chrome driver
    options = uc.ChromeOptions()
    chrome_version = 127
    driver = uc.Chrome(options=options, version_main=chrome_version)

    # Perform login
    login(driver)

    # Get input file path from user input
    input_file = input("Enter the path to the Excel file containing product links: ")

    # Scrape titles and prices
    scrape_titles_and_prices(driver, input_file)

if __name__ == '__main__':
    main()

Email input found and value entered.
Password input found and value entered.
Login button found and clicked.


Enter the path to the Excel file containing product links:  Build Links.xlsx


Scraped data has been saved to the Excel file: output_20240731_115229.xlsx
