# Amazon Web Scraping

### - By Izhan Abdullah 

In [4]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
from urllib.parse import urljoin


In [5]:
# Function to extract Product Title
def get_title(soup):
    try:
        title = soup.find("span", attrs={'id': 'productTitle'})
        if title:
            title_value = title.text.strip()
            return title_value
        else:
            return ""
    except AttributeError:
        return ""


    return title_string


# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("span", attrs = {'class' : 'a-price-whole'}).string.strip()

    except AttributeError:
        try:
            # If there is some deal price
            price = soup.find("span", attrs = {'class' : 'a-price-whole'}).string.strip()
        except:
            price = ""

    return price


# Function to extract Product Rating
def get_rating(soup):
    try:
        rating = soup.find("i", attrs = {'id' : 'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        try:
            rating = soup.find("span",attrs = {'id' : 'acrCustomerReviewText'}).string.strip()
        except:
            rating = ""

    return rating


# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("i", attrs={'class': 'a-icon a-icon-star a-star-4-5 cm-cr-review-stars-spacing-big'}).string.strip()

    except AttributeError:
        review_count = ""

    return review_count


In [None]:
if __name__ == '__main__':
    # user agent
    HEADERS = {
        'User-Agent': 'whatismybrowser.com',
        'Accept-Language': 'en-US, en;q=0.5'
    }

    # Webpage URL
    URL = "https://www.amazon.com/s?k=playstation+5&crid=1D0N9P29353LK&sprefix=playstation+5%2Caps%2C375&ref=nb_sb_noss_2"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as list of Tag Objects
    Links = soup.find_all("a", attrs={'class': 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

    # Store the Links
    Links_list = []

    # Loop for extracting links from Tag objects
    for link in Links:
        Links_list.append(link.get('href'))

    d = {
        "title": [],
        "price": [],
        "rating": [],
        "reviews": []
    }


    for link in Links_list:
        # Make sure the link is a valid URL before making a request
        full_url = urljoin("https://www.amazon.com", link)
        
        try:
            new_webpage = requests.get(full_url, headers=HEADERS)
            new_soup = BeautifulSoup(new_webpage.content, "html.parser")
            
            # Function calls to display all necessary product information
            d['title'].append(get_title(new_soup))
            d['price'].append(get_price(new_soup))
            d['rating'].append(get_rating(new_soup))
            d['reviews'].append(get_review_count(new_soup))

        except requests.exceptions.RequestException as e:
            print("Error making request for link:", full_url)
            print("Error message:", str(e))

    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('',np.nan, inplace = True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv", header= True, index= False)

In [5]:
amazon_df

Unnamed: 0,title,price,rating,reviews
0,PlayStation 5 Console (PS5),,"7,134 ratings",
1,PlayStation 5 Console – Marvel’s Spider-Man 2 ...,,121 ratings,4.6 out of 5 stars
2,PlayStation 5 Console CFI-1102A,,"7,922 ratings",
3,PlayStation PS5 Console – God of War Ragnarök ...,,"12,312 ratings",
4,PlayStation 5 Console (Renewed),,31 ratings,
5,$100 PlayStation Store Gift Card [Digital Code],,"230,001 ratings",4.7 out of 5 stars
6,Just Dance 2023 Edition (Code In Box) for Play...,,287 ratings,
7,$50 PlayStation Store Gift Card [Digital Code],,"230,001 ratings",4.7 out of 5 stars
8,$25 PlayStation Store Gift Card [Digital Code],,"230,001 ratings",4.7 out of 5 stars
9,$30 PlayStation Plus – Wallet Funds [Digital C...,,"230,001 ratings",4.7 out of 5 stars
