In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [5]:
#Function to extract Product Title
def get_title(soup):
    try:
        #Outer Tag Object
        title=soup.find("span", attrs={'id':'productTitle'})
        
        #Inner NavigatableString Object
        title_value=title.text
        
        #Title as string value
        title_string=title_value.strip()
    except AttributeError:
        title_string=""
    
    return title_string

#Function to extract Product Price
def get_price(soup):
    try:
        price=soup.find("span",attrs={'id':'priceblock_ourprice'}).string.strip()
    except AttributeError:
        try:
            #if there is some deal price
            price=soup.find("span",attrs={'id':'priceblock_dealprice'}).string.strip()    
        except:
            price = ""
    return price

#Function to extract product rating
def get_rating(soup):
    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()            
    except AttributeError:
        try:
            rating = soup.find("span",attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating=""
    return rating

#Function to extract number of user reviews
def get_review_count(soup):
    try:
        review_count=soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()
    except AttributeError:
        review_count=""
    return review_count

#Function to extract Availability Status
def get_availability(soup):
    try:
       available= soup.find("div",attrs={'id':'availability'})
       available= available.find("span").string.strip()
    except:
        available = "Not Available"
    return available
  

In [8]:
if __name__ == '__main__':
    #Headers for request
    Headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36','Accept-Language':'en-US,en;q=0.5'})
    
    #The Webpage URL
    url = "https://www.amazon.com/s?k=laptops+on+sale+clearance+2024&crid=J0OOO1J6Z0CL&sprefix=lap%2Caps%2C503&ref=nb_sb_ss_ts-doa-p_2_3"
    #HTTP request
    webpage=requests.get(url,headers=Headers)
    print(webpage)

    print(type(webpage.content))

    #Converting bytes into html format

    soup=BeautifulSoup(webpage.content, 'html.parser')

#Fetch links as list of Tag objects
    links = soup.find_all("a",attrs={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
    
    #Store the links
    links_list=[]
    
    #Loop for extracting links from Tag objects
    for link in links:
        links_list.append(link.get('href'))
    d = {'title':[],'price':[],'rating':[],'reviews':[],'availability':[]}
    
    #Loop for extracting product details from each link
    for link in links_list:
        new_webpage= requests.get("https://www.amazon.com" + link, headers=Headers)    
        new_soup = BeautifulSoup(new_webpage.content,"html.parser")
        
        #Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))
    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('',np.nan,inplace=True)
    amazon_df=amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv",header=True,index=False)    


<Response [200]>
<class 'bytes'>


In [12]:
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
0,"HP 14 Laptop, Intel Celeron N4020, 4 GB RAM, 6...",,4.0 out of 5 stars,"1,935 ratings",Not Available
1,"HP 14 Laptop (14"" Micro-Edge, Intel N4120, 16G...",,4.1 out of 5 stars,82 ratings,Not Available
2,"HP 14"" Ultral Light Laptop for Students and Bu...",,4.1 out of 5 stars,"1,859 ratings",Not Available
3,"Lenovo Chromebook 14 Touchscreen Laptop (14"" F...",,5.0 out of 5 stars,3 ratings,Not Available
4,Lenovo 100E Chromebook 2ND Gen 81QB000AUS Lapt...,,4.6 out of 5 stars,124 ratings,Only 2 left in stock - order soon.
5,"ASUS TUF Gaming A15 (2024) Gaming Laptop, 15.6...",,4.3 out of 5 stars,684 ratings,Not Available
6,"Dell Inspiron 15 3520 3000 15.6"" Touchscreen F...",,5.0 out of 5 stars,21 ratings,Not Available
7,"Dell Chromebook 11 3100 11.6"" Chromebook - 136...",,3.9 out of 5 stars,583 ratings,Only 14 left in stock - order soon.
8,"ASUS Zenbook 14 OLED 2024 Business Laptop 14"" ...",,3.8 out of 5 stars,5 ratings,Only 1 left in stock - order soon.
9,Apple 2024 MacBook Air 15-inch Laptop with M3 ...,,4.6 out of 5 stars,919 ratings,Not Available
