## Automating Web Interaction with Python's Selenium: Accessing Amazon and Maximizing Window

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC

path = r'C:\Users\user\OneDrive - Carleton University\Desktop\Semester 2\ITEC_5010_Applied Programming\My Project\Data_Analysis_Portfolio-main\Data_Analysis_Portfolio-main\Latest Project\ITEC5010_TermProject_DataAnalysis\chromedriver.exe'
driver = webdriver.Chrome(path)

driver.get("https://www.amazon.com")
driver.maximize_window()

## Searching for 'Healthcare Survival Kit' on Amazon

In [2]:
def SearchProduct(keyword):
    # assigning any keyword for searching
    search_box = driver.find_element(By.ID, 'twotabsearchtextbox')
    # typing the keyword in searchbox
    search_box.send_keys(keyword)
    # creating WebElement for a search button
    search_button = driver.find_element(By.ID, 'nav-search-submit-button')
    # clicking search_button
    search_button.click()
    # waiting for the page to download
    driver.implicitly_wait(2)
    
keyword = input("Please enter a product to search: ")    #"healthcare survival kit"    
SearchProduct(keyword)

Please enter a product to search: healthcare survival kit


In [3]:
# Arrays for keeping data
product_name = []
product_asin = []
product_price = []
product_ratings = []
product_ratings_num = []
product_link = []
product_delivery = []

## Scraping 'Healthcare Survival Kit' Listings on Amazon

## Exporting Amazon 'Healthcare Survival Kit' Product List to Excel 

In [4]:
while True:
    # Finding all items on the page
    
    items = wait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, '//div[contains(@class, "s-result-item s-asin")]')))
    
    for item in items:
        # finding name
        name = item.find_element(By.XPATH, './/span[contains(@class,"a-color-base a-text-normal")]')
        product_name.append(name.text)

        # finding ASIN number 
        data_asin = item.get_attribute("data-asin")
        product_asin.append(data_asin)

        # finding price
        whole_price = item.find_elements(By.XPATH, './/span[@class="a-price-whole"]')
        fraction_price = item.find_elements(By.XPATH, './/span[@class="a-price-fraction"]')

        if whole_price != [] and fraction_price != []:
            price = '.'.join([whole_price[0].text, fraction_price[0].text])
        else:
            price = 0
        product_price.append(price)

        # finding ratings box
        ratings_box = item.find_elements(By.XPATH, './/div[@class="a-row a-size-small"]/span')

        # finding ratings and ratings_num
        if ratings_box != []:
            ratings = ratings_box[0].get_attribute('aria-label')
            ratings_num = ratings_box[1].get_attribute('aria-label')
        else:
            ratings, ratings_num = 0, 0

        product_ratings.append(ratings)
        product_ratings_num.append(str(ratings_num))
        
         # finding delivery
        from selenium.common.exceptions import NoSuchElementException

        try:
            delivery = item.find_element(By.XPATH, './/div[@data-cy="delivery-recipe"]//span[@class="a-color-base a-text-bold"]')
            product_delivery.append(delivery.text)
        except NoSuchElementException:
            product_delivery.append("0")

    next_button = driver.find_elements(By.XPATH, '//a[@class="s-pagination-item s-pagination-next s-pagination-button s-pagination-separator"]')
    if len(next_button) == 0:
        break
    else:
        # Clicking the "Next" button to go to the next page
        next_button[0].click()
        # waiting for the page to download
        driver.implicitly_wait(2)

In [5]:
import pandas as pd


product_price = [str(p).replace('\uff0c', ',') for p in product_price]
rows = zip(product_name, product_asin, product_price, product_ratings, product_ratings_num, product_delivery)

df = pd.DataFrame(rows, columns=['Name', 'Asin', 'Price', 'Rating', 'Rating_Num', 'Delivery'])

df.to_excel("product_list.xlsx", index=False)

## Close the browser window and terminate the WebDriver session

In [None]:
driver.quit()