# Amazon Scraper

## Get link of products

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import time
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
# --- Setup Chrome ---
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=chrome_options)

# --- Open Amazon search page ---
url = "https://www.amazon.com/s?k=smartphones&crid=O0BZ48E0CB7T&qid=1756727802&sprefix=smartph%2Caps%2C225&xpid=9d-GUbwFCiYyw&ref=sr_pg_1"
driver.get(url)

all_links = []

while True:
    time.sleep(2)  # wait for page to load

    # --- Extract product links ---
    products = driver.find_elements(By.CSS_SELECTOR, "span[data-component-type='s-product-image'] a")
    for p in products:
        link = p.get_attribute("href")
        if link:
            all_links.append(link)

    print(f"Collected {len(all_links)} so far...")

    # --- Try to click "Next" button ---
    try:
        next_btn = driver.find_element(By.CSS_SELECTOR, "a.s-pagination-next")
        if "disabled" in next_btn.get_attribute("class"):
            break  # no more pages
        driver.execute_script("arguments[0].click();", next_btn)
    except NoSuchElementException:
        break  # no more next page

print("✅ Done. Total links:", len(all_links))
for l in all_links:
    print(l)

driver.quit()

In [None]:
links_df = pd.DataFrame(all_links, columns=["Product_Link"])
links_df.to_csv("data\amazon_products.csv", index=False)

## Get data of products

In [None]:
batch_size = 100
start = 0  

chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(options=chrome_options)
links = pd.read_csv("amazon_products.csv")["Product_Link"].tolist()
products_list = []

for idx, link in enumerate(links[start:start+batch_size]):
    try:
        print(f"Scraping product {idx+1}/{batch_size}")
        driver.get(link)
        time.sleep(2)
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Extract product details
        image = soup.find("img", class_="a-dynamic-image a-stretch-horizontal")
        image = image["src"] if image else None

        name = soup.find("span", class_="a-size-large product-title-word-break")
        name = name.get_text(strip=True) if name else None

        price = soup.find("span", class_="a-price-whole")
        price = price.get_text(strip=True) if price else None

        # --- Average rating (stars) ---
        avg_rating_tag = soup.find("span", class_="a-icon-alt")
        rating = avg_rating_tag.get_text(strip=True) if avg_rating_tag else None

        # --- Number of ratings (reviews count, aria-label) ---
        reviews_tag = soup.find("span", {"id": "acrCustomerReviewText"})
        number_of_ratings = reviews_tag["aria-label"] if reviews_tag and "aria-label" in reviews_tag.attrs else None

        breadcrumb_links = soup.select("ul.a-unordered-list.a-horizontal.a-size-small a")
        categories = [a.get_text(strip=True) for a in breadcrumb_links]
        category = categories[0] if len(categories) > 0 else None
        subcategory = categories[-1] if len(categories) > 1 else None

        brand = soup.find("span", class_="a-size-base po-break-word")
        brand = brand.get_text(strip=True) if brand else None

        discount = soup.find("span", class_="a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage")
        discount = discount.get_text(strip=True) if discount else None

        sold_by_tag = soup.select_one("div.offer-display-feature-text span.a-size-small.offer-display-feature-text-message")
        sold_by = sold_by_tag.get_text(strip=True) if sold_by_tag else None
        
        products_list.append((
            name, rating, number_of_ratings, category, subcategory,
            brand, price, discount, sold_by,
            image
        ))
    except Exception as e:
        print(f"Error at index {idx + start}: {e}")

driver.quit()

In [14]:
products_df = pd.DataFrame(products_list, columns=[
    "Name", "Rating", "Number_of_Ratings", "Category", "Subcategory",
    "Brand", "Price", "Discount", "Sold_by",   "Image"])
products_df

Unnamed: 0,Name,Rating,Number_of_Ratings,Category,Subcategory,Brand,Price,Discount,Sold_by,Image
0,"Samsung Galaxy S25 Ultra, 512GB Smartphone, Un...",4.5 out of 5 stars,"2,250 Reviews",Cell Phones & Accessories,Cell Phones,Samsung,1119.,-21%,Amazon.com,https://m.media-amazon.com/images/I/61n0lmxP5-...
1,"Samsung Galaxy A16 5G A Series, Unlocked Andro...",4.2 out of 5 stars,"1,761 Reviews",Cell Phones & Accessories,Cell Phones,Samsung,199.,,Amazon.com,https://m.media-amazon.com/images/I/71KGkQ+KOK...
2,"SAMSUNG Galaxy S24 FE AI Phone, 128GB Unlocked...",4.5 out of 5 stars,"1,370 Reviews",Cell Phones & Accessories,Cell Phones,SAMSUNG,969.,,,https://m.media-amazon.com/images/I/61uakkLoHx...
3,Motorola Moto G 5G | 2024 | Unlocked | Made fo...,4.2 out of 5 stars,"2,953 Reviews",Cell Phones & Accessories,Cell Phones,Motorola,139.,-30%,Amazon.com,https://m.media-amazon.com/images/I/61TTdTnaEe...
4,Tracfone Moto G Play (2023) [Activation Promot...,4.3 out of 5 stars,"1,244 Reviews",Cell Phones & Accessories,Cell Phones,TracFone,49.,,Amazon.com,
...,...,...,...,...,...,...,...,...,...,...
95,Motorola razr | 2024 | Unlocked | Made for US ...,4.0 out of 5 stars,306 Reviews,Cell Phones & Accessories,Cell Phones,Motorola,499.,-29%,Amazon.com,https://m.media-amazon.com/images/I/61-0BtzHKB...
96,"Alcatel 1 (16GB) 5.0"" Full View Display, Remov...",3.6 out of 5 stars,"2,288 Reviews",Cell Phones & Accessories,Cell Phones,Alcatel,54.,,Amazon,https://m.media-amazon.com/images/I/81wN2woIou...
97,Google Pixel Fold - Unlocked Android 5G Smartp...,4.0 out of 5 stars,132 Reviews,Cell Phones & Accessories,Cell Phones,Google,749.,,Amazon,https://m.media-amazon.com/images/I/71MDT0OqJn...
98,Google Pixel 6 – 5G Android Phone - Unlocked S...,4.2 out of 5 stars,"4,950 Reviews",Cell Phones & Accessories,Cell Phones,Google,299.,,Amazon,https://m.media-amazon.com/images/I/61KfUtmSrz...


In [None]:
products_df.to_csv("data\amazon_products_detailed.csv", index=False)