# **Web Scraping - Amazon Customer Reviews**

## Objective: 
* Scrape Amazon Website for Customer Reviews on product: Apple iPhone 11 Pro (256GB) - Space Grey 

## **Import the necessary libraries**

In [47]:
import sys
import time
from bs4 import BeautifulSoup
import requests
import re
import pycountry # ISO country, subdivision, language, currency
from datetime import datetime
import pandas as pd

In [17]:
try:
    # url
    url = 'https://www.amazon.in/Apple-iPhone-11-Pro-256GB/product-reviews/B07XVMJF2D/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews'     
    response=requests.get(url) # this might throw an exception if something goes wrong.

except Exception as e:
    error_type, error_obj, error_info = sys.exc_info()      
    print ('ERROR FOR LINK:',url)                          
    print (error_type, 'Line:', error_info.tb_lineno)     
                                                 
time.sleep(2)

# store text/html
html = response.text

In [18]:
response.ok

True

### **Parse html code**

In [20]:
# parse html code
soup = BeautifulSoup(html, 'html.parser')

In [22]:
cus_names = soup.find_all('span', class_='a-profile-name')

### **Customer Profile Names**

In [79]:
# Obtain Customer Profile Names
cus_names = soup.find_all('span', class_='a-profile-name')

# create a list to contain all profile names
profile_names = []

# iterate through cus_names, and store names in the list created above
for name in cus_names:
    if name.text.strip() not in profile_names:
        profile_names.append(name.text.strip())

### **Customer Ratings**

In [25]:
# ratings
ratings = soup.find_all('i', attrs={'data-hook':'review-star-rating'})

# create a list to contain all profile names
cus_ratings = []

# iterate through cus_names, and store names in the list created above
for r in ratings:
    cus_ratings.append(r.text.strip())

# isolate the single number rating out of 5
# convert to float
cus_ratings_num = []
for x in cus_ratings:
    num = float(re.search('\d\.\d', x).group())
    cus_ratings_num.append(num)

[4.0, 5.0, 5.0, 3.0, 4.0, 5.0, 4.0, 5.0, 5.0, 4.0]

### **Customer Review Country and Date**

In [27]:
# review date
review_date= soup.find_all('span', attrs={'data-hook':'review-date'})

# iterate through review_date and store country and date in 'cus_review_' list
cus_review_ = []
for i in review_date:
    cus_review_.append(i.text)

# obtain names of countries from pycountry libraries
country_names = []
for names in list(pycountry.countries):
    country_names.append(names.name)

# Obtain country name from cus_review_ list
country_of_review = []
for x in cus_review_:
    for y in country_names:
        if y in x:
            country_of_review.append(y)

In [38]:
# Obtain data from cus_review_ list
cus_date = []
for d in cus_review_:
    date_data = re.search('\d.*\d$', d).group()
    format_date = '%d %B %Y'
    date = datetime.strptime(date_data, format_date).date().isoformat()
    cus_date.append(date)

### **Customer Purchase Status**

In [40]:
# purchase_status
purchase = soup.find_all('span', attrs={'data-hook':'avp-badge'})

# store all purchase
purchase_status = []
for p in purchase:
    purchase_status.append(p.text) 

### **Customer Review Title**

In [42]:
# review title
review_t = soup.find_all('a', attrs={'data-hook':'review-title'})

review_title = []
for r in review_t:
    review_title.append(r.text.strip())

### **Customer Review Body**

In [44]:
# review body
review_b = soup.find_all('span', attrs={'data-hook':'review-body'})

review_body = []
for r in review_b:
    review_body.append(r.text.strip())

## **Create Data Frame for Amazon Customer Reviews**

In [82]:
# complete data
data = {'Name':profile_names, 'Country':country_of_review, 'Date':cus_date, 'Ratings':cus_ratings_num, 'Purchase_Status':purchase_status, 'Title':review_title, 'Body':review_body}

# create data frame
# input column names and records
amz_cus_reviews = pd.DataFrame(data)

# inspect dataframe
amz_cus_reviews.head()

Unnamed: 0,Name,Country,Date,Ratings,Purchase_Status,Title,Body
0,Bhagwant Patil,India,2021-04-02,4.0,Verified Purchase,Great iPhone but overpriced.,I'm writing this review after 1 month of use.L...
1,Aradhya.inc,India,2019-10-09,5.0,Verified Purchase,Great camera faster face id,Short: Go for it if you can afford.TL;DR;I've ...
2,Nomadic Nordic,India,2021-06-07,5.0,Verified Purchase,Precise in every dimension.,1. Box cartonComes with the plastic outer cove...
3,Deepak,India,2019-10-04,3.0,Verified Purchase,Upto the Mark but not too much change & Batter...,I am always being fan of iOS & Apple Products....
4,Farhad Tarapore,India,2020-03-13,4.0,Verified Purchase,Superb little package (11 Pro),What I love:1. Amazing battery life - lasts me...
