In [5]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [6]:
# Function to extract Product Title
def getTitle(soup):
    
    try:
        # Outer Tag Object
        title = soup.find("span",attrs={'id':'productTitle'})
        
        # Inner Navigatable Object
        title_text = title.text
        
        # Title as String Value
        title_string = title_text.strip()
        
    except AttributeError:
        title_string = ""
        
    return title_string


# Function to extract Product Price
def getPrice(soup):
    
    try:
        # Outer Tag Object
        price = soup.find("span",attrs = {"class":"a-offscreen"})
        
        # Inner Navigatable Object
        price_text = price.text
        
        # Price as String Value
        price_string = price_text.strip()
        
    except AttributeError:
        price_string = ""
        
    return price_string


# Function to extract Product Ratings
def getRatings(soup):
    
    try:
        # Outer Tag Object
        rating = soup.find("span",attrs={"class":"a-icon-alt"})
        
        # Inner Navigatable Object
        rating_text = rating.text
        
        # Rating as String Value
        rating_string = rating_text.strip()
        
    except AttributeError:
        rating_string = ""
        
    return rating_string


# Function to extract Review Count
def getReviews(soup):
    
    try:
        # Outer Tag Object
        reviews = soup.find("span",attrs={"id":"acrCustomerReviewText"})
        
        # Inner Navigatable Object
        reviews_text = reviews.text
        
        # Reviews as String Value
        reviews_string = reviews_text.strip()
        
    except AttributeError:
        reviews_string = ""
        
    return reviews_string


# Function to extract Availability
def getAvailability(soup):
    
    try:
        # Outer Tag Object
        availability = soup.find("span",attrs={"class":"a-size-medium a-color-success"})
        
        # Inner Navigatable Object
        availability_text = availability.text
        
        # Reviews as String Value
        availability_string = availability_text.strip()
        
    except AttributeError:
        availability_string = ""
        
    return availability_string

In [8]:
if __name__ == '__main__':
    
    
    # Headers for request
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', 'Accept-Language':'en-US, en;q=0.5'})
    
    
    # The Webpage URL
    URL = "https://www.amazon.com/s?k=playstation%2B5&crid=3E414I3C996MU&sprefix=playstation%2B5%2Caps%2C881&ref=nb_sb_noss_1"
    
    
    # HTTP Request
    webpage = requests.get(URL, headers = HEADERS)
    
     
    # Soup Object containing all Data
    soup = BeautifulSoup(webpage.content,"html.parser")
    
    
    # Fetch links a s list of Tags Object
    links = soup.find_all("a",attrs={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
    
    
    # Store the Links
    links_list = []
    
    
    # Loops for extracting Links from Tag Object
    for link in links:
        links_list.append(link.get("href"))
        
    prodDict = {"title":[] , "price":[], "ratings":[], "reviews":[], "availability":[]}
    
    
    # Loops for extracting Product Details from each Object
    for link in links_list:
        new_webpage = requests.get("https://amazon.com" + link, headers = HEADERS)
        
        new_soup = BeautifulSoup(new_webpage.content,"html.parser")
        
        # Function calls to display all necessary Product information
        prodDict["title"].append(getTitle(new_soup))
        prodDict["price"].append(getPrice(new_soup))
        prodDict["ratings"].append(getRatings(new_soup))
        prodDict["reviews"].append(getReviews(new_soup))
        prodDict["availability"].append(getAvailability(new_soup))
        
    
    amazon_df = pd.DataFrame.from_dict(prodDict)
    amazon_df['title'].replace('',np.nan,inplace=True)
    amazon_df = amazon_df.dropna(subset = ['title'])
    amazon_df.to_csv("amazon_scrapped_data_devesh.csv",header=True,index=False)



In [10]:
amazon_df

Unnamed: 0,title,price,ratings,reviews,availability
0,Sonic Origins Plus - PlayStation 5,$39.99,Previous page of related Sponsored Products,,"This item will be released on June 23, 2023."
1,PlayStation PS5 Console – God of War Ragnarök ...,$509.00,4.8 out of 5 stars,"7,049 ratings",In Stock
2,PlayStation 5 Console CFI-1215A01X,$499.00,4.8 out of 5 stars,"2,485 ratings",In Stock
3,PlayStation DualSense Wireless Controller - St...,$74.94,4.8 out of 5 stars,"71,945 ratings",
4,Minecraft Legends - Deluxe Edition PlayStation 5,$49.99,Previous page of related Sponsored Products,,"This item will be released on April 18, 2023."
5,PlayStation 5 Pulse 3D Wireless Headset,$97.00,4.7 out of 5 stars,"19,150 ratings",
6,PlayStation 5 Console CFI-1102A,$479.97,4.9 out of 5 stars,"7,201 ratings",
7,Marvel's Midnight Suns Enhanced Edition - Play...,$39.99,4.3 out of 5 stars,191 ratings,In Stock
8,Razer Quick Charging Stand for PlayStation 5: ...,$29.99,4.8 out of 5 stars,"1,320 ratings",In Stock
9,PowerA Ultra High Speed HDMI Cable for PlaySta...,$39.99,4.8 out of 5 stars,"2,417 ratings",In Stock
