In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import numpy as np

In [2]:
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id" : "productTitle"}).text.strip()
    except AttributeError:
        title = ""
    return title

In [3]:
def get_price(soup):
    try:
        price = soup.find("span", attrs={"class" : "a-offscreen"}).string.strip()
    except AttributeError:
        price = ""
    return price

In [4]:
def get_ratings(soup):
    try:
        ratings = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    except AttributeError:
        try:
            ratings = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            ratings = ""
    return ratings

In [5]:
def get_reviews(soup):
    try:
        count = soup.find("span", attrs={"id" : "acrCustomerReviewText"}).string.strip()
    except AttributeError:
        count = ""
    return count

In [6]:
def get_availability(soup):
    try:
        available = soup.find("div", attrs={"id" : "availability"}).find("span").string.strip()
    except AttributeError:
        available = "Not Available"
    return available

In [7]:
if __name__ == '__main__':
    # user agent
    HEADERS = ({'User-Agent' : '', 'Accept-Language' : 'en-US, en, q = 0.5'})
    # url
    URL = "https://www.amazon.com/s?k=iphone+16&ref=nb_sb_noss"
    # request
    webpage = requests.get(URL, headers=HEADERS)
    # soup
    soup = BeautifulSoup(webpage.content, "html.parser")
    # fetch all links
    links = soup.find_all("a", attrs={'class' : 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
    links_list = []
    for link in links:
        links_list.append(link.get('href')) # provides link
    d = {"title" : [], "price" : [], "ratings" : [], "reviews" : [], "availability" : []}
    for link in links_list:
        webpage = requests.get("https://www.amazon.com" + link, HEADERS)
        soup = BeautifulSoup(webpage.content, "html.parser")
        d['title'].append(get_title(soup))
        d['price'].append(get_price(soup))
        d['ratings'].append(get_ratings(soup))
        d['reviews'].append(get_reviews(soup))
        d['availability'].append(get_availability(soup))
df = pd.DataFrame.from_dict(d)
df.replace('', np.nan, inplace=True)
df = df.dropna(subset=['title'])
df.to_csv("amazon_data.csv", header=True, index=False)

In [8]:
df

Unnamed: 0,title,price,ratings,reviews,availability
0,Boost Mobile | Apple iPhone 16 (128 GB) - Ultr...,,5.0 out of 5 stars,,Not Available
2,Boost Mobile | Apple iPhone 16 Pro (128 GB) - ...,,5.0 out of 5 stars,,Not Available
4,"SAMSUNG Galaxy S24 Ultra Cell Phone, 512GB AI ...",$893.47,4.5 out of 5 stars,"1,802 ratings",Not Available
5,"OnePlus 12R, 16GB RAM+256GB, Dual-SIM, US Fact...",$507.83,4.4 out of 5 stars,421 ratings,Not Available
6,"OnePlus Open, 16GB RAM+512GB, Dual-SIM, Emeral...","$1,149.07",4.1 out of 5 stars,230 ratings,Not Available
7,"OnePlus 12,16GB RAM+512GB,Dual-SIM,Unlocked An...",$702.71,4.5 out of 5 stars,533 ratings,Not Available
8,Boost Mobile | Apple iPhone 16 Pro Max (256 GB...,,5.0 out of 5 stars,,Not Available
10,OnePlus Nord N30 5G | Unlocked Dual-SIM Androi...,$217.92,4.3 out of 5 stars,988 ratings,Not Available
12,Boost Mobile | Apple iPhone 16 Pro (128 GB) - ...,,5.0 out of 5 stars,,Not Available
14,Boost Mobile | Apple iPhone 16 (128 GB) - Ultr...,,5.0 out of 5 stars,,Not Available
