## Scraping for Amazon, Target data

### This is the code for scraping a given search keyword URL on Amazon which populates a .csv file using the resulting data, such as availability, price, product ID etc.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import re
import time
from datetime import datetime
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import io
from urllib.request import urlopen
import csv

In [2]:
# Function to extract Product Title
def get_title(soup):
     
    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
 
        # Inner NavigatableString Object
        title_value = title.string
 
        # Title as a string value
        title_string = title_value.strip()
 
 
    except AttributeError:
        title_string = ""   
 
    return title_string
 
# Function to extract Product Price
def get_price(soup):
 
    try:
        price = soup.find("span", attrs={'id':'priceblock_ourprice'}).string.strip()
 
    except AttributeError:
 
        try:
            # If there is some deal price
            price = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()
 
        except:     
            price = ""  
 
    return price
 
# Function to extract Product Rating
def get_rating(soup):
 
    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
         
    except AttributeError:
         
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = "" 
 
    return rating
 
# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()
         
    except AttributeError:
        review_count = ""   
 
    return review_count
 
# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id':'availability'})
        available = available.find("span").string.strip()
 
    except AttributeError:
        available = "Not Available"
 
    return available    
 
 
if __name__ == '__main__':
 
    # Headers for request
    HEADERS = ({'User-Agent':
                'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
                'Accept-Language': 'en-US'})
 
    # The webpage URL
    URL = "https://www.amazon.com/s?k=ppe&ref=nb_sb_noss_2"
     
    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)
 
    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "lxml")
 
    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})
 
    # Store the links
    links_list = []
 
    # Loop for extracting links from Tag Objects
    for link in links:
        links_list.append(link.get('href'))
 
 
    # Loop for extracting product details from each link 
    for link in links_list:
 
        new_webpage = requests.get("https://www.amazon.com" + link, headers=HEADERS)
 
        new_soup = BeautifulSoup(new_webpage.content, "lxml")
    
        curr_title = get_title(new_soup)
        curr_price = get_price(new_soup)
        curr_stock = get_availability(new_soup)
         
        # Function calls to display all necessary product information
        print("Product Title =", curr_title)
        print("Product Price =", curr_price)
        #print("Product Rating =", get_rating(new_soup))
        #print("Number of Product Reviews =", get_review_count(new_soup))
        print("Availability =", curr_stock)
        print()
        print()
        
        with open("heat.csv", "w", newline="", encoding='utf-8') as f_heat:
            csv_heat = csv.writer(f_heat)
            csv_heat.writerow(["Title", "Price", "In Stock?"])
            csv_heat.writerow([curr_title, curr_price, curr_stock])
    
    
        
        
#         with io.open('scraped_data', "w", encoding="utf-8") as f:
#             f.write(f"{curr_title},") 
#             f.write(f"{curr_price},") 
#             f.write(f"{curr_stock},\n")
#             f.close()

Product Title = 
Product Price = 
Availability = Not Available


Product Title = Polyethylene Cover Gown - Case of 75 | 23GSM Disposable Gown with Sleeves, Easy Over the Head Feature and Waist Tie | Thumb Loop Enclosure for Increased Coverage | Business Friendly Pricing
Product Price = $89.99
Availability = In Stock.


Product Title = 50 PCS Blue Face Masks, Non Woven Thick 3-Layers Breathable Facial Masks with Adjustable Earloop, Mouth and Nose Cover
Product Price = $9.96
Availability = In Stock.


Product Title = PPE Kit All-in-One Personal Protection Kits to GO,10 Pack
Product Price = 
Availability = In Stock.


Product Title = All-in-One Personal Protection Kits to GO - 10 Pack - With Plastic Gloves, Hand Cleansing Wipes, Disposable Face Mask
Product Price = $24.98
Availability = In Stock.


Product Title = 10pcs Glasses Face Shield Reusable Goggle Shields Replaceable Anti Fog Shields Transparent Face Shield Protect Face and Eyes for Women and Men (10, Transparent)
Product Price = 

Product Title = Zubrex 50 Pcs Disposable 3 Ply Safety Face Mask for Protection - with Nanofiber Lining Elastic Earloops, Lightweight Breathable Protective Anti-Dust Facial Masks Health School Office
Product Price = $12.98
Availability = In Stock.


Product Title = BLScode Disposable Face Protective Masks, 3-Layer Facial Cover Masks with Elastic Ear Loops, Comfortable Universal Design for Adults & Kids, Suitable for Home, Office, Outdoor (Pack of 50)
Product Price = $11.99
Availability = In Stock.


Product Title = TCP Global Salon World Safety - Kids Face Masks 150 Pk 3-Ply Protective PPE (5 Colors, 30 Each)
Product Price = $29.96
Availability = In Stock.


Product Title = TCP Global Salon World Safety - Sealed Dispenser Box of 50 Face Masks Breathable Disposable 3-Ply Protective PPE with Nose Clip and Ear Loops
Product Price = $9.96
Availability = In Stock.


Product Title = Pacific PPE 4Pairs Cleaning Glove Reusable Household Dishwashing Gloves-Latex Free Waterproof PVC Gloves for Ki

In [11]:
# Convert the extracted data to pandas dataframe for ease of use.
amazon_scrape = pd.read_csv("heat.csv") 
# Preview the first 5 lines of the loaded data 
amazon_scrape

Unnamed: 0,Title,Price,In Stock?
0,"Patriot Face Cover, Made in USA, NIOSH Approve...",$84.95,In Stock.
