# Amazon Web Scrapping
## Obtaining daily prices changes for a product (book)

In [12]:
# import libraries

from bs4 import BeautifulSoup
import requests
import smtplib
import time
import datetime


In [22]:
# connect to the product website/link
URL = "https://www.amazon.co.uk/Apple-iPad-WI-FI-32GB-Refurbished/dp/B07NYS898H/ref=zg-bs_amazon-renewed_sccl_3/258-0469945-6067500?pd_rd_w=WxkkY&content-id=amzn1.sym.401f1a3a-5fa9-46fb-9ed2-7c7d241a11cd&pf_rd_p=401f1a3a-5fa9-46fb-9ed2-7c7d241a11cd&pf_rd_r=43YKHB1FR44WPFEHVM75&pd_rd_wg=iNlum&pd_rd_r=d20b281e-2b5c-4285-a827-bcfe4f3319dc&pd_rd_i=B07NYS898H&psc=1"

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.42", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
# obtained user agent from https://httpbin.org/get and obtained user agent 

page = requests.get(URL, headers=headers)

# pulling the content from the page and formatting it better with prettify
soup1 = BeautifulSoup(page.content, "html.parser")
soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

# specify content we want
title = soup2.find(id='productTitle').get_text()
price = soup2.find(id='renewedBuyBoxPrice').get_text()

# view data obtained
print(title)
print(price)


            2018 Apple iPad (9.7-inch, WiFi, 32GB) - Space Grey (Renewed)
           

                    £175.00
                   


In [23]:
# clean output
#strip to remove extra spaces and index on price to remove pound sign

price = price.strip()[1:]
title = title.strip()

print(title)
print(price)

2018 Apple iPad (9.7-inch, WiFi, 32GB) - Space Grey (Renewed)
175.00


In [24]:
# Obtain date when price is extracted

import datetime

today = datetime.date.today()

print(today)

2022-12-22


In [25]:
# create csv to import data obtained

import csv

header = ['Title', 'Price', 'Date']
data = [title, price, today]

# note: w = write, newline = no space when adding data
with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerow(data)
 


In [26]:
# view csv data

import pandas as pd

df = pd.read_csv(r'C:\Users\cheil\AmazonWebScraperDataset.csv')

print(df)

                                               Title  Price        Date
0  2018 Apple iPad (9.7-inch, WiFi, 32GB) - Space...  175.0  2022-12-22


In [27]:
# append new data to csv

# note: a+ = append
with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(data)


In [30]:
# create function to automate the price check using the code we used above

def check_price():
    # get website content
    URL = "https://www.amazon.co.uk/Apple-iPad-WI-FI-32GB-Refurbished/dp/B07NYS898H/ref=zg-bs_amazon-renewed_sccl_3/258-0469945-6067500?pd_rd_w=WxkkY&content-id=amzn1.sym.401f1a3a-5fa9-46fb-9ed2-7c7d241a11cd&pf_rd_p=401f1a3a-5fa9-46fb-9ed2-7c7d241a11cd&pf_rd_r=43YKHB1FR44WPFEHVM75&pd_rd_wg=iNlum&pd_rd_r=d20b281e-2b5c-4285-a827-bcfe4f3319dc&pd_rd_i=B07NYS898H&psc=1"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.42", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
    page = requests.get(URL, headers=headers)

    soup1 = BeautifulSoup(page.content, "html.parser")
    soup2 = BeautifulSoup(soup1.prettify(), "html.parser")

    title = soup2.find(id='productTitle').get_text()
    price = soup2.find(id='renewedBuyBoxPrice').get_text()

    # get information we need
    price = price.strip()[1:]
    title = title.strip()

    # obtain date price is checked
    import datetime
    today = datetime.date.today()   

    # append new data into csv
    import csv 
    header = ['Title', 'Price', 'Date']
    data = [title, price, today]

    with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)
        
   # if(price < 150):
      #  send_mail()

In [None]:
# check price daily and automatically update csv

while(True):
    check_price()
    time.sleep(86400) #repeats daily

In [None]:
# look at csv data 

import pandas as pd
df = pd.read_csv(r'C:\Users\cheil\AmazonWebScraperDataset.csv')

print(df)

In [None]:
# in case I want to get an email when the price goes down

def send_mail():
    server = smtplib.SMTP_SSL('smtp.gmail.com',465)
    server.ehlo()
    #server.starttls()
    server.ehlo()
    server.login('insert-email@outlook.com','xxxxxxxxxxxxxx')
    
    subject = "That ipad you were looking at is now below £150!"
    body = "Cheila, the product is at a reduced price!
    msg = f"Subject: {subject}\n\n{body}"
    
    server.sendmail(
        'insert-email@outlook.com',
        msg
     
    )