Notebook: Amazon Product Scraper (ASIN-based) — Selenium (amazon.com)
Cell 1 — Install dependencies (run once)

If you already installed them and verified imports, you can skip this cell.

In [None]:
import sys
print("Kernel Python:", sys.executable)

!{sys.executable} -m pip install -U pip
!{sys.executable} -m pip install -U selenium webdriver-manager


Cell 2 — Verify installation

In [None]:
import selenium
from webdriver_manager.chrome import ChromeDriverManager

print("selenium:", selenium.__version__)
print("webdriver-manager: OK")


Cell 3 — Configuration (ASIN only)

In [None]:
ASIN = "B003R4Q7PS"

MARKETPLACE = "amazon.com"
PRODUCT_URL = f"https://www.{MARKETPLACE}/dp/{ASIN}"

PRODUCT_URL


Cell 4 — Imports (Selenium runtime)

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import time


Cell 5 — Build the Chrome driver

In [None]:
def build_driver(headless: bool = False) -> webdriver.Chrome:
    options = Options()

    # For Amazon, start with headless=False to reduce bot detection during development
    if headless:
        options.add_argument("--headless=new")

    # Reduce automation fingerprinting
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--window-size=1280,900")

    # Realistic User-Agent
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    )

    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options
    )
    return driver


Cell 6 — Extract product data (title + bullets)

In [None]:
def extract_amazon_product(driver: webdriver.Chrome, asin: str) -> dict:
    url = f"https://www.amazon.com/dp/{asin}"
    driver.get(url)

    wait = WebDriverWait(driver, 25)

    # 1) Title
    title_el = wait.until(EC.presence_of_element_located((By.ID, "productTitle")))
    title = title_el.text.strip()

    # 2) Bullets
    bullets = []
    bullet_els = driver.find_elements(By.CSS_SELECTOR, "#feature-bullets ul li span")

    for el in bullet_els:
        text = el.text.strip()
        if text and text.lower() not in {"", "see more"}:
            bullets.append(text)

    # 3) Detect basic bot / captcha signals (best-effort)
    page_title = driver.title.lower()
    blocked = any(k in page_title for k in ["robot", "captcha", "sorry"])

    return {
        "asin": asin,
        "url": url,
        "title": title,
        "bullets": bullets,
        "blocked_suspected": blocked,
        "page_title": driver.title
    }


Cell 7 — Run the scraper

In [None]:
driver = build_driver(headless=False)

try:
    product_data = extract_amazon_product(driver, ASIN)
finally:
    time.sleep(2)  # brief visual inspection window
    driver.quit()

product_data


Cell 8 — Pretty display in Jupyter

In [None]:
from IPython.display import Markdown, display

display(Markdown(f"## {product_data['title']}"))
display(Markdown(f"**ASIN:** `{product_data['asin']}`"))
display(Markdown(f"**URL:** {product_data['url']}"))
display(Markdown(f"**Blocked suspected:** `{product_data['blocked_suspected']}`"))
display(Markdown("---"))

display(Markdown("### Product Feature Bullets"))
if product_data["bullets"]:
    for b in product_data["bullets"]:
        display(Markdown(f"- {b}"))
else:
    display(Markdown("_No bullets found (possible variation / blocked page / different layout)._"))


Cell 9 — Batch mode (optional): scrape multiple ASINs

In [None]:
ASIN_LIST = ["B003R4Q7PS", "B00ZV9RDKK"]  # replace with your list

driver = build_driver(headless=False)

results = []
try:
    for asin in ASIN_LIST:
        try:
            data = extract_amazon_product(driver, asin)
            results.append(data)
            time.sleep(2)  # polite delay
        except Exception as e:
            results.append({"asin": asin, "error": str(e)})
finally:
    driver.quit()

results
