In [1]:
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import os
import requests
import pandas as pd
import numpy as np


In [6]:
# Load environment variables from the .env file
load_dotenv()

# Access the variables
USER_AGENT = os.getenv("USER_AGENT")
ACCEPT_LANGUAGE = os.getenv("ACCEPT_LANGUAGE")
# print(USER_AGENT)
# print(ACCEPT_LANGUAGE)

### functions 

In [3]:
#Function to extract Product Title 
def get_title(soup):
    try:
        #outer tag Object
        title = soup.find("span",attrs={'id':'productTitle'})

        #Inner NavigatableString Object 
        title_value = title.text

        #Title as a String value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string


#Function to extract Product Price 
def get_price(soup):
    try:
        price = soup.find("span",attrs={'class':'a-price aok-align-center reinventPricePriceToPayMargin priceToPay'}).string.strip()

    except AttributeError:
        
        try:
            #If there is some deal price
            price = soup.find("span",attrs={'class':'a-price aok-align-center reinventPricePriceToPayMargin priceToPay'}).string.strip()

        except:
            price = "" 


    return price

#Function to extract Product Rating 
def get_rating(soup):

    try:
        rating = soup.find("i",attrs={'class':'a-icon a-icon-star a-star-4-5 cm-cr-review-stars-spacing-big'}).string.strip()

    except AttributeError: 
        try:
            rating = soup.find("span",attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""
            
    return rating

#Function to extract Number of User Reviews
def get_review_count(soup):
    try:
       review_count = soup.find("span",attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""

    return review_count

#Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div",attrs={'id':'availability'})
        available = available.find("span").string.strip()
    except AttributeError:
        available ="Not Available"

    return available
        
    

## Main Code

In [4]:
if __name__ == '__main__':
    # add your user agent
    HEADERS = ({'User-Agent':USER_AGENT,
    'Accept-Language':ACCEPT_LANGUAGE})

    #The webpage URL
    URL = "https://www.amazon.in/s?k=playstation+5&crid=3O8SOC5QG6OIU&sprefix=playstation+5%2Caps%2C237&ref=nb_sb_noss_2"

    #HTTP request 
    webpage = requests.get(URL,headers=HEADERS)

    # SOUP Object containing all data 
    soup = BeautifulSoup(webpage.content,"html.parser")

    #Fetch links as List of Tag Objects
    links = soup.find_all("a",attrs={'class':'a-link-normal s-line-clamp-2 s-link-style a-text-normal'})

    #Store the links
    links_list = [] 

    #Loop for extracting links from Tag Objects 
    for link in links: 
        links_list.append(link.get('href'))

    d = {"title":[],"price":[],"rating":[],"reviews":[],"availability":[]} 

    #Loop for extracting product details from each link
    for link in links_list:
        new_webpage = requests.get("https://amazon.in"+ link , headers=HEADERS)

        new_soup = BeautifulSoup(new_webpage.content,"html.parser")

        #Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))

    amazon_df = pd.DataFrame.from_dict(d)
    # amazon_df['title'].replace('',np.nan,inplace=True)
    amazon_df['title'] = amazon_df['title'].replace('',np.nan)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv",header=True,index=False)

In [5]:
amazon_df 

Unnamed: 0,title,price,rating,reviews,availability
0,Sony PlayStation®5 Digital Edition (slim) Cons...,,4.6 out of 5 stars,324 ratings,In stock
1,Sony PlayStation5 Gaming Console (Slim),,4.4 out of 5 stars,358 ratings,In stock
2,Sony PS5® Console Video Game Disc - Fortnite B...,,4.5 out of 5 stars,46 ratings,In stock
3,Sony PS5® Console Video Game Digital - Fortnit...,,4.4 out of 5 stars,60 ratings,In stock
4,Sony DualSense Wireless Controller White (Play...,,4.2 out of 5 stars,"3,947 ratings",In stock
5,Sony DualSense Wireless Controller Red (PlaySt...,,4.4 out of 5 stars,812 ratings,In stock
6,Ant Esports Dock5 RGB with Cooling Fan and Dua...,,4.4 out of 5 stars,323 ratings,In stock
7,Sony PlayStation Portal Remote Player 5,,4.3 out of 5 stars,"3,289 ratings",In stock
8,U&I Entertainment Minecraft | Standard Edition...,,4.3 out of 5 stars,9 ratings,In stock
9,Take 2 Interactive Grand Theft Auto 5 (PS5),,4.9 out of 5 stars,26 ratings,In stock
