# 🛒 Web Scraping Smartphones Products from Amazon

## Introduction  
- **Project**: Scraping smartphones products from Amazon
- **Goal**: Collect product details (name, price, rating, brand, etc.)  
- **Tools**: Requests, BeautifulSoup, Selenium, Pandas  

## Get link of products

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd
from bs4 import BeautifulSoup

In [18]:
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=chrome_options)

# --- Extract product links ---
url = "https://www.amazon.com/s?k=smartphones&crid=O0BZ48E0CB7T&qid=1756727802&sprefix=smartph%2Caps%2C225&xpid=9d-GUbwFCiYyw&ref=sr_pg_1"
driver.get(url)

all_links = []

while True:

    links = [p.get_attribute("href") for p in driver.find_elements(By.CSS_SELECTOR, "span[data-component-type='s-product-image'] a")]
    all_links.extend(links)
    
    print(f"Collected {len(all_links)} so far...")

    # --- Try to click "Next" button ---
    try:
        next_btn = driver.find_element(By.CLASS_NAME, "s-pagination-next")
        
        # Check if it's disabled
        if "s-pagination-disabled" in next_btn.get_attribute("class"):
            break
        else:
            next_btn.click()
            time.sleep(2)

    except:
        break


print("Done. Total links:", len(all_links))
driver.quit()

Collected 16 so far...
Collected 32 so far...
Collected 48 so far...
Collected 64 so far...
Collected 80 so far...
Collected 96 so far...
Collected 112 so far...
Collected 128 so far...
Collected 144 so far...
Collected 160 so far...
Collected 176 so far...
Collected 192 so far...
Collected 208 so far...
Collected 224 so far...
Collected 240 so far...
Collected 256 so far...
Collected 272 so far...
Collected 288 so far...
Collected 304 so far...
Collected 306 so far...
Done. Total links: 306


In [None]:
links_df = pd.DataFrame(all_links, columns=["Product_Link"])
links_df.to_csv("amazon_products.csv", index=False)

## Get data of products

In [9]:
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=chrome_options)
links = pd.read_csv("amazon_products.csv")["Product_Link"].tolist()
products_list = []


chunk1 = slice(0,100)
chunk2 = slice(100,200)
chunk3= slice(200,)

# --- test on first chunk ---
for link in links[chunk1]:

    driver.get(link)
    time.sleep(2)

    from bs4 import BeautifulSoup
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # --- Product details extraction ---
    try:
        image = soup.find("img", {"class": "a-dynamic-image"})["src"]
    except:
        image = None

    try:
        name = soup.find("span", {"class": "product-title-word-break"}).get_text(strip=True)
    except:
        name = None

    try:
        price = soup.find("span", {"class": "a-price-whole"}).get_text(strip=True)
    except:
        price = None

    try:
        rating = soup.find("span", {"class": "a-icon-alt"}).get_text(strip=True)
    except:
        rating = None

    try:
        number_of_ratings = soup.find("span", {"id": "acrCustomerReviewText"}).get_text(strip=True)
    except:
        number_of_ratings = None

    try:
        breadcrumb_links = soup.select("ul.a-unordered-list.a-horizontal.a-size-small a")
        categories = [a.get_text(strip=True) for a in breadcrumb_links]
        category = categories[0] if len(categories) > 0 else None
        subcategory = categories[-1] if len(categories) > 1 else None
    except:
        category = None
        subcategory = None

    try:
        brand = soup.find("span", {"class": "po-break-word"}).get_text(strip=True)
    except:
        brand = None

    try:
        discount = soup.select_one(
            "span.a-size-large.a-color-price.savingPriceOverride.aok-align-center.reinventPriceSavingsPercentageMargin.savingsPercentage"
        ).get_text(strip=True)
    except:
        discount = None

    try:
        sold_by = soup.select_one(
            "div.offer-display-feature-text span.a-size-small.offer-display-feature-text-message"
        ).get_text(strip=True)
    except:
        sold_by = None

    # Append product
    products_list.append(
        (
            name,
            rating,
            number_of_ratings,
            category,
            subcategory,
            brand,
            price,
            discount,
            image,
        )
    )
    
    print(f"Scraped {len(products_list)} products so far...")

driver.quit()

Scraped 1 products so far...
Scraped 2 products so far...
Scraped 3 products so far...
Scraped 4 products so far...
Scraped 5 products so far...
Scraped 6 products so far...
Scraped 7 products so far...
Scraped 8 products so far...
Scraped 9 products so far...
Scraped 10 products so far...
Scraped 11 products so far...
Scraped 12 products so far...
Scraped 13 products so far...
Scraped 14 products so far...
Scraped 15 products so far...
Scraped 16 products so far...
Scraped 17 products so far...
Scraped 18 products so far...
Scraped 19 products so far...
Scraped 20 products so far...
Scraped 21 products so far...
Scraped 22 products so far...
Scraped 23 products so far...
Scraped 24 products so far...
Scraped 25 products so far...
Scraped 26 products so far...
Scraped 27 products so far...
Scraped 28 products so far...
Scraped 29 products so far...
Scraped 30 products so far...
Scraped 31 products so far...
Scraped 32 products so far...
Scraped 33 products so far...
Scraped 34 products

In [10]:
products_df = pd.DataFrame(products_list, columns=[
    "Name", "Rating", "Number_of_Ratings", "Category", "Subcategory",
    "Brand", "Price", "Discount",   "Image"])
products_df.head()

Unnamed: 0,Name,Rating,Number_of_Ratings,Category,Subcategory,Brand,Price,Discount,Image
0,"Samsung Galaxy S25 Ultra, 512GB Smartphone, Un...",4.5 out of 5 stars,"2,345 ratings",Cell Phones & Accessories,Cell Phones,Samsung,1169.0,-18%,https://m.media-amazon.com/images/I/61n0lmxP5-...
1,"Samsung Galaxy A16 5G A Series, Unlocked Andro...",4.2 out of 5 stars,"1,789 ratings",Cell Phones & Accessories,Cell Phones,Samsung,198.0,,https://m.media-amazon.com/images/I/71KGkQ+KOK...
2,"SAMSUNG Galaxy S24 FE AI Phone, 128GB Unlocked...",4.5 out of 5 stars,"1,381 ratings",Cell Phones & Accessories,Cell Phones,SAMSUNG,1069.0,,https://m.media-amazon.com/images/I/61uakkLoHx...
3,Motorola Moto G 5G | 2024 | Unlocked | Made fo...,4.2 out of 5 stars,"2,996 ratings",Cell Phones & Accessories,Cell Phones,Motorola,139.0,-30%,https://m.media-amazon.com/images/I/61TTdTnaEe...
4,Tracfone Moto G Play (2023) [Activation Promot...,4.3 out of 5 stars,"1,271 ratings",Cell Phones & Accessories,Cell Phones,TracFone,49.0,,https://m.media-amazon.com/images/I/61Ydn7qAod...


## Cleaning some columns

In [None]:
products_df['Number_of_Ratings'] = products_df['Number_of_Ratings'].str.replace(' ratings', '').str.replace(',', '').astype(float)
products_df['Rating'] = products_df['Rating'].str.extract(r'(\d+\.\d+)').astype(float)
products_df['Price'] = products_df['Price'].str.replace('.', '').str.replace(',', '').astype(float)
products_df['Discount'] = products_df['Discount'].str.extract(r'(-?\d+)').astype(float) / 100
products_df.to_csv("amazon_products_details.csv", index=False)
products_df.head()

Unnamed: 0,Name,Rating,Number_of_Ratings,Category,Subcategory,Brand,Price,Discount,Image
0,"Samsung Galaxy S25 Ultra, 512GB Smartphone, Un...",4.5,2345.0,Cell Phones & Accessories,Cell Phones,Samsung,1169.0,-0.18,https://m.media-amazon.com/images/I/61n0lmxP5-...
1,"Samsung Galaxy A16 5G A Series, Unlocked Andro...",4.2,1789.0,Cell Phones & Accessories,Cell Phones,Samsung,198.0,,https://m.media-amazon.com/images/I/71KGkQ+KOK...
2,"SAMSUNG Galaxy S24 FE AI Phone, 128GB Unlocked...",4.5,1381.0,Cell Phones & Accessories,Cell Phones,SAMSUNG,1069.0,,https://m.media-amazon.com/images/I/61uakkLoHx...
3,Motorola Moto G 5G | 2024 | Unlocked | Made fo...,4.2,2996.0,Cell Phones & Accessories,Cell Phones,Motorola,139.0,-0.3,https://m.media-amazon.com/images/I/61TTdTnaEe...
4,Tracfone Moto G Play (2023) [Activation Promot...,4.3,1271.0,Cell Phones & Accessories,Cell Phones,TracFone,49.0,,https://m.media-amazon.com/images/I/61Ydn7qAod...
