In [1]:
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup

In [2]:
########### Data Parsing #########

In [3]:
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

In [4]:
reviewlist = []

In [5]:
def get_soup(url):
    
    r = requests.get(url, headers = headers)
    soup = BeautifulSoup(r.text, 'html.parser')
    
    return soup

In [6]:
def get_reviews(soup):
    reviews = soup.find_all('div', {'data-hook': 'review'})
    try:
        for item in reviews:
            review = {
            'product': soup.title.text.replace('Amazon.co.uk:Customer reviews:', '').strip(),
            'title': item.find('a', {'data-hook': 'review-title'}).text.strip(),
            'rating':  float(item.find('i', {'data-hook': 'review-star-rating'}).text.replace('out of 5 stars', '').strip()),
            'v_purchase' : item.find('span', {'data-hook': 'avp-badge'}).text.strip(),
            'body': item.find('span', {'data-hook': 'review-body'}).text.strip(),
            }
            reviewlist.append(review)
    except:
        pass

In [7]:
for x in tqdm(range(1,999)):
    soup = get_soup(f'https://www.amazon.in/New-Apple-iPhone-12-64GB/product-reviews/B08L5TGWD1/ref=cm_cr_arp_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber={x}')
    get_reviews(soup)
    if not soup.find('li', {'class': 'a-disabled a-last'}):
        pass
    else:
        break

 11%|██████████████▉                                                                                                                    | 114/998 [02:03<15:56,  1.08s/it]


In [8]:
reviewlist

[{'product': 'Amazon.in:Customer reviews: Apple iPhone 12 (64GB) - (Product) RED',
  'title': 'Worst battery',
  'rating': 3.0,
  'v_purchase': 'Verified Purchase',
  'body': 'Worst battery performance.Iphone 11 is far better den this..In 4 hour battery will come down from 100 to 15 percent.Please dont buy this product at this price.'},
 {'product': 'Amazon.in:Customer reviews: Apple iPhone 12 (64GB) - (Product) RED',
  'title': 'Kidney as a load balancer',
  'rating': 4.0,
  'v_purchase': 'Verified Purchase',
  'body': 'Sold kidney bought this, now not feeling well but the number of days I am alive with one kidney will enjoy using this phone. Guys be careful if you rich it’s ok else sell something else but not kidney it hurts'},
 {'product': 'Amazon.in:Customer reviews: Apple iPhone 12 (64GB) - (Product) RED',
  'title': 'Another Fabulous IPhone ( IPhone 12, Blue 64GB)',
  'rating': 5.0,
  'v_purchase': 'Verified Purchase',
  'body': 'The media could not be loaded.\n                \n

In [9]:
import pandas as pd

In [10]:
review_df = pd.DataFrame(reviewlist)

In [11]:
review_df.head()

Unnamed: 0,product,title,rating,v_purchase,body
0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...
1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ..."
2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...
3,Amazon.in:Customer reviews: Apple iPhone 12 (6...,The Beast,5.0,Verified Purchase,This was my first switch to an ios device afte...
4,Amazon.in:Customer reviews: Apple iPhone 12 (6...,This phone is a joke and the joke is on us!!! ...,1.0,Verified Purchase,Extremely disappointed with this phone. It’s a...


In [12]:
review_df.to_csv("iphone12_amz_reviews.csv")

In [13]:
######### Data cleaning ########

In [14]:
import nltk

In [15]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\goura\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [16]:
stopwords = nltk.corpus.stopwords.words('english')

In [17]:
review_df_new = pd.read_csv("iphone12_amz_reviews.csv")
review_df_new.head()

Unnamed: 0.1,Unnamed: 0,product,title,rating,v_purchase,body
0,0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...
1,1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ..."
2,2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...
3,3,Amazon.in:Customer reviews: Apple iPhone 12 (6...,The Beast,5.0,Verified Purchase,This was my first switch to an ios device afte...
4,4,Amazon.in:Customer reviews: Apple iPhone 12 (6...,This phone is a joke and the joke is on us!!! ...,1.0,Verified Purchase,Extremely disappointed with this phone. It’s a...


In [18]:
review_df_new = review_df_new.loc[:, ~review_df_new.columns.str.contains('^Unnamed')]

In [19]:
# remove stopwords from review body
review_df_new['body_without_stopwords'] = review_df_new['body'].apply(lambda x: ' '.join([word for word in str(x).split() if word not in (stopwords)]))

In [20]:
review_df_new.head()

Unnamed: 0,product,title,rating,v_purchase,body,body_without_stopwords
0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...,Worst battery performance.Iphone 11 far better...
1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ...","Sold kidney bought this, feeling well number d..."
2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...,The media could loaded. Another beauty Apple. ...
3,Amazon.in:Customer reviews: Apple iPhone 12 (6...,The Beast,5.0,Verified Purchase,This was my first switch to an ios device afte...,This first switch ios device using android unt...
4,Amazon.in:Customer reviews: Apple iPhone 12 (6...,This phone is a joke and the joke is on us!!! ...,1.0,Verified Purchase,Extremely disappointed with this phone. It’s a...,Extremely disappointed phone. It’s seemingly a...


In [21]:
# removing punctuation from review body
review_df_new["body_without_stopwords"] = review_df_new['body_without_stopwords'].str.replace('[^\w\s]','')

  review_df_new["body_without_stopwords"] = review_df_new['body_without_stopwords'].str.replace('[^\w\s]','')


In [22]:
review_df_new.head()

Unnamed: 0,product,title,rating,v_purchase,body,body_without_stopwords
0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...,Worst battery performanceIphone 11 far better ...
1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ...",Sold kidney bought this feeling well number da...
2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...,The media could loaded Another beauty Apple I ...
3,Amazon.in:Customer reviews: Apple iPhone 12 (6...,The Beast,5.0,Verified Purchase,This was my first switch to an ios device afte...,This first switch ios device using android unt...
4,Amazon.in:Customer reviews: Apple iPhone 12 (6...,This phone is a joke and the joke is on us!!! ...,1.0,Verified Purchase,Extremely disappointed with this phone. It’s a...,Extremely disappointed phone Its seemingly ave...


In [23]:
####### Sentiment analysis ######

In [24]:
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [25]:
# getting subjective and objective from reviewers viewpoint, where 
# The polarity score is a float within the range [-1.0, 1.0]. The subjectivity is a float within the range [0.0, 1.0] 
# where 0.0 is very objective and 1.0 is very subjective.

In [26]:
review_df_new[['polarity', 'subjectivity']] = review_df_new['body_without_stopwords'].apply(lambda Text: pd.Series(TextBlob(Text).sentiment))

In [27]:
review_df_new.head()

Unnamed: 0,product,title,rating,v_purchase,body,body_without_stopwords,polarity,subjectivity
0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...,Worst battery performanceIphone 11 far better ...,-0.133333,0.833333
1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ...",Sold kidney bought this feeling well number da...,0.255,0.63
2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...,The media could loaded Another beauty Apple I ...,0.162247,0.539151
3,Amazon.in:Customer reviews: Apple iPhone 12 (6...,The Beast,5.0,Verified Purchase,This was my first switch to an ios device afte...,This first switch ios device using android unt...,0.31875,0.601935
4,Amazon.in:Customer reviews: Apple iPhone 12 (6...,This phone is a joke and the joke is on us!!! ...,1.0,Verified Purchase,Extremely disappointed with this phone. It’s a...,Extremely disappointed phone Its seemingly ave...,-0.120635,0.387393


In [28]:
#load VADER
analyzer = SentimentIntensityAnalyzer()

In [29]:
#Add VADER metrics to dataframe
review_df_new['compound'] = [analyzer.polarity_scores(v)['compound'] for v in review_df_new['body_without_stopwords']]
review_df_new['neg'] = [analyzer.polarity_scores(v)['neg'] for v in review_df_new['body_without_stopwords']]
review_df_new['neu'] = [analyzer.polarity_scores(v)['neu'] for v in review_df_new['body_without_stopwords']]
review_df_new['pos'] = [analyzer.polarity_scores(v)['pos'] for v in review_df_new['body_without_stopwords']]
review_df_new.head(3)

Unnamed: 0,product,title,rating,v_purchase,body,body_without_stopwords,polarity,subjectivity,compound,neg,neu,pos
0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...,Worst battery performanceIphone 11 far better ...,-0.133333,0.833333,-0.296,0.171,0.708,0.121
1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ...",Sold kidney bought this feeling well number da...,0.255,0.63,0.9027,0.081,0.444,0.476
2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...,The media could loaded Another beauty Apple I ...,0.162247,0.539151,0.9945,0.05,0.699,0.251


In [30]:
review_df_new.to_csv("iphone12_amz_reviews_WITH_SENTIMENT.csv")

In [31]:
#### saving the updated df into sqllite db ######

In [32]:
import sqlite3 as sql

In [33]:
conn = sql.connect('amz_reviewsv1.db')
review_df_new.to_sql('iphone12_reviews_v1', conn)

In [34]:
### Checking DB #####

In [36]:
reviews_from_db = pd.read_sql('SELECT * FROM iphone12_reviews_v1', conn)

In [37]:
reviews_from_db.head()

Unnamed: 0,index,product,title,rating,v_purchase,body,body_without_stopwords,polarity,subjectivity,compound,neg,neu,pos
0,0,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Worst battery,3.0,Verified Purchase,Worst battery performance.Iphone 11 is far bet...,Worst battery performanceIphone 11 far better ...,-0.133333,0.833333,-0.296,0.171,0.708,0.121
1,1,Amazon.in:Customer reviews: Apple iPhone 12 (6...,Kidney as a load balancer,4.0,Verified Purchase,"Sold kidney bought this, now not feeling well ...",Sold kidney bought this feeling well number da...,0.255,0.63,0.9027,0.081,0.444,0.476
2,2,Amazon.in:Customer reviews: Apple iPhone 12 (6...,"Another Fabulous IPhone ( IPhone 12, Blue 64GB)",5.0,Verified Purchase,The media could not be loaded.\n ...,The media could loaded Another beauty Apple I ...,0.162247,0.539151,0.9945,0.05,0.699,0.251
3,3,Amazon.in:Customer reviews: Apple iPhone 12 (6...,The Beast,5.0,Verified Purchase,This was my first switch to an ios device afte...,This first switch ios device using android unt...,0.31875,0.601935,0.9153,0.044,0.714,0.241
4,4,Amazon.in:Customer reviews: Apple iPhone 12 (6...,This phone is a joke and the joke is on us!!! ...,1.0,Verified Purchase,Extremely disappointed with this phone. It’s a...,Extremely disappointed phone Its seemingly ave...,-0.120635,0.387393,-0.9174,0.154,0.762,0.084


In [38]:
##########

In [41]:
######## Data insight #######