# **Scrapping Amazon Product Data from Search Results**

In [1]:
!pip install bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4
Successfully installed bs4-0.0.2


In [2]:
!pip install requests



In [3]:
# importing libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [4]:
# headers to mimic a real browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Referer": "https://www.google.com/",
}

url = 'https://www.amazon.com/s?k=moth+balls&page=2&xpid=ioWbEdwFbzmhp&qid=1760595892&ref=sr_pg_2'

webpage = requests.get(url, headers=headers)

In [5]:
webpage.status_code

200

In [None]:
s = BeautifulSoup(webpage.content, 'html.parser')

In [None]:
links = s.find_all('a', class_="a-link-normal s-line-clamp-4 s-link-style a-text-normal") # all the product links

In [None]:
product_url = 'https://www.amazon.com/' + links[0].get('href') # complete product URL

In [10]:
product_url

'https://www.amazon.com//Repellent-Peppermint-Repellents-Strongly-Squirrels/dp/B0FBV88WSC/ref=sr_1_49?dib=eyJ2IjoiMSJ9.vc2JNiYCZ1Kmu6AL-6JsHoaqIAOcLfymajq3IEbVOArKdPf4c8bJz6YUnJMxmRWbGU3x_QUukWFvAg6_ITm1YzFkEihBCd89O_j2CHJSNtRVu1NYnuL3lSavMgTTHUbU-hGNdVQEIWOo8p4GzRWT3EnDx61gXuocVwIzViIdaGxaOtWijHBcCghyPFdzZydP0gHculw9HC2LVoC-mjEVRADKCFO4ofGNGLoxrjobS1TQcCQHWGTuxqwauKPrSfO2wGUIRk6KwR7ruRaSOig3HUA6imDV8kra_e4LBrQrb4Y.kxpz2UtkPpcVfLAcmGM0g0ZdjHVmQQRwKazjJDjkoqU&dib_tag=se&keywords=moth+balls&qid=1760614412&sr=8-49&xpid=ioWbEdwFbzmhp'

In [None]:
list_webpage = requests.get(product_url, headers=headers) # requesting the product URL

In [12]:
list_webpage

<Response [200]>

In [13]:
product_s = BeautifulSoup(list_webpage.content, 'html.parser')

In [None]:
title = product_s('span', attrs={'id': "productTitle"})[0].text.strip() # product title

In [15]:
title

'Mice Repellent, Rodent Repellent Indoor/Outdoor, Moth Balls, Peppermint Pest and Mice Rat Repellents and Strongly Repel Roaches, Squirrels, Ants, Other Rodents 10 Pack'

In [None]:
rating = product_s('span', class_='reviewCountTextLinkedHistogram noUnderline')[0].get('title') # rating

In [None]:
rating

'3.3 out of 5 stars'

In [18]:
# price = product_s('span', class_="a-price-whole")[0].text + product_s('span', class_="a-price-fraction")[0].text
price = product_s('span', attrs={'class' : 'a-offscreen'})[0].text

In [19]:
price

'$29.99'

In [20]:
review = product_s('span', attrs={'id' : "acrCustomerReviewText"})[0].text

In [21]:
review

'25 ratings'

---

In [22]:
# headers to mimic a real browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Referer": "https://www.google.com/",
}

url = 'https://www.amazon.com/s?k=moth+balls&page=1&xpid=ioWbEdwFbzmhp&qid=1760595892&ref=sr_pg_1'

webpage = requests.get(url, headers=headers)

In [23]:
webpage

<Response [200]>

In [24]:
s = BeautifulSoup(webpage.content, 'html.parser')

In [None]:
products = s.find_all("div", {"data-component-type": "s-search-result"}) # all the products with the specified div and attribute

- ### Extracting ASINs

In [26]:
asin = []

for item in products:
  asin.append(item.get('data-asin'))

In [27]:
len(asin)

48

- ### Extracting Titles

In [28]:
title = []

for item in products:
  title.append(item.find('h2', class_='a-size-base-plus a-spacing-none a-color-base a-text-normal').text)

In [29]:
len(title)

48

- ### Extracting Images URL

In [30]:
image_url = []

for item in products:
  image_url.append(item.find('img', class_='s-image').get('srcset').split()[4])

In [31]:
len(image_url)

48

- ### Extracting Price

In [32]:
price = []

for item in products:
    whole_price = item.find("span", class_="a-price-whole")
    fraction_price = item.find("span", class_="a-price-fraction")

    if whole_price and fraction_price:
        temp_price = whole_price.text + fraction_price.text
    elif whole_price:
        temp_price = whole_price.text
    else:
        temp_price = "N/A"

    price.append(temp_price)

In [33]:
len(price)

48

- ### Extracting Number of Reviews

In [34]:
reviews = []

for item in products:
  temp_review = item.find('a', class_='a-link-normal s-underline-text s-underline-link-text s-link-style')
  if temp_review:
    reviews.append(temp_review.get('aria-label'))
  else:
    reviews.append('N/A')

In [35]:
reviews[5]

'2,617 ratings'

In [36]:
len(reviews)

48

- ### Extracting Ratings

In [37]:
ratings = []

for item in products:
  temp_rating = item.find('span', 'a-size-small a-color-base')
  if temp_rating:
    ratings.append(temp_rating.text)
  else:
    ratings.append('N/A')

In [38]:
ratings[:10]

['4.5', '4.3', '4.6', '4.5', '4.2', '4.2', '4.6', '4.4', '4.3', '4.4']

- Creating Listing URL using ASIN

In [39]:
listing_url = []

for i in asin:
  if i == 'N/A':
    listing_url.append('N/A')
  else:
    url = 'https://www.amazon.com/dp/' + i
    listing_url.append(url)

In [40]:
len(listing_url)

48

In [None]:
# creating a dictionary of the scraped data
data = {'ASIN' : asin,
        'title': title,
        'image_url' : image_url,
        'price': price,
        'reviews': reviews,
        'rating' : ratings,
        'listing_url': listing_url}

In [None]:
pd.DataFrame(data) # creating a dataframe from the dictionary

Unnamed: 0,ASIN,title,image_url,price,reviews,rating,listing_url
0,B07RRTLSXT,Enoz para Moth Balls for Closets and More - Ki...,https://m.media-amazon.com/images/I/814q8ILgMP...,16.45,"11,832 ratings",4.5,https://www.amazon.com/dp/B07RRTLSXT
1,B0B8L195N9,"4 Pack Old Fashioned Original Moth Balls, Carp...",https://m.media-amazon.com/images/I/61Q6y5PYNG...,8.99,"2,898 ratings",4.3,https://www.amazon.com/dp/B0B8L195N9
2,B0CJCP5LK9,"Moth Shield Moth Ball Traps for Clothes, Furni...",https://m.media-amazon.com/images/I/61QCv8bqQc...,4.94,297 ratings,4.6,https://www.amazon.com/dp/B0CJCP5LK9
3,B07JZVX9TT,Moth balls Moth Shield 4Oz Pack (Lavender Scen...,https://m.media-amazon.com/images/I/819G5L78qO...,8.99,"1,770 ratings",4.5,https://www.amazon.com/dp/B07JZVX9TT
4,B01CRC32J4,4oz Moth Balls,https://m.media-amazon.com/images/I/91bPPwHWd4...,5.89,"3,587 ratings",4.2,https://www.amazon.com/dp/B01CRC32J4
5,B077H5J4QJ,Moth Balls For Rodents,https://m.media-amazon.com/images/I/81Gtdy5Tbq...,3.26,"2,617 ratings",4.2,https://www.amazon.com/dp/B077H5J4QJ
6,B000LNU62I,NO Moth Closet Hanger Refills - Kills Clothes ...,https://m.media-amazon.com/images/I/81SBu1-Mvi...,48.26,427 ratings,4.6,https://www.amazon.com/dp/B000LNU62I
7,B0044UKLV0,Enoz Moth Ball Packets - Cedar Scented (Case o...,https://m.media-amazon.com/images/I/61hyX+FO3Z...,38.79,153 ratings,4.4,https://www.amazon.com/dp/B0044UKLV0
8,B0DLCT247N,FINEFIX Moth Balls for Outdoor Use Rodents Pac...,https://m.media-amazon.com/images/I/71gAzEznX6...,24.95,214 ratings,4.3,https://www.amazon.com/dp/B0DLCT247N
9,B07W8HML6W,para Moth Balls Kills Clothes Moths and Carpet...,https://m.media-amazon.com/images/I/512bXjNctE...,8.75,635 ratings,4.4,https://www.amazon.com/dp/B07W8HML6W
