In [3]:
# Sentiment analysis from the data ibtained from any Website

# Some of the data is not allowed on amazon for webscraping
# we are using IMDB reviews for teh movie 2046 by Wong Kar Wai

In [2]:
# Loading required libraries 

import requests   
from bs4 import BeautifulSoup as bs 
import re 
import nltk
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
from textblob import TextBlob 
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
# defining vader sentiment function .

def get_vader_sentiment(review): 
  
    sia = SentimentIntensityAnalyzer()
    
    analysis = sia.polarity_scores(review)
  
    if analysis["compound"] > 0: 
        return 'positive'
    elif analysis["compound"] < 0: 
        return 'negative'
    else: 
        return 'neutral'

In [4]:
# assigning imdb  website url to url 

url = 'https://www.imdb.com/title/tt0212712/reviews?ref_=tt_ql_3'

In [5]:
# getting htm from website using rquests

response = requests.get(url)
print(response.status_code)

200


In [6]:
# create soup object using bs4 to iterate over the extracted content 

soup = bs(response.content,"html.parser")


In [7]:
# using specific html codes extracting reviews from the soup object 

reviews = soup.findAll("div", attrs={"class","text"})
reviews

[<div class="text show-more__control">2046 was directed by Kar Wai Wong, who also directed In the Mood for Love. This film is also lyrical, deliberately paced, and very romantic.<br/><br/>Without giving too much away, the film takes place in Hong Kong and Singapore in the 60's. The main character, Chow, is a writer and womanizer. Part of the story takes place in his work, a science fiction tale called 2046. <br/><br/>The story is told out of sequence, with past and present jumbled. In a clever use of irony, we gradually understand that the future is being used to tell the past. Some scenes are presented early, in a way that is confusing until the context is presented later.<br/><br/>There are 3 female characters who are in his life, and the story is segmented accordingly.<br/><br/>The cinematography is beautiful. Interestingly, Wong uses 3 colors nearly exclusively: Blood red, sea green, and yellow. Sometimes he will use light to make those colors stand out, other times it is the objec

In [8]:
# now passing the reviwes to the sentiment analyser as the data is clean
# we will iterate through each text blocks (each nlock is a review ) and pass it thorugh the analyser
# and save the result to dataframe

import pandas as pd

# creating empty data frame

df= pd.DataFrame()
rev = [] 
rt=[]  

for i in range(1,25):
    review_dict = {} 
    review_dict['rev_id'] = i
    review_dict['blob_sentiment'] = get_vader_sentiment(reviews[i].text)
    review_dict['vader_sentiment'] = get_vader_sentiment(reviews[i].text)
    
    # appending the review to the rt list
    
    rt.append(reviews[i].text)  
    
    # appending the result dictionary to rev list 
    
    rev.append(review_dict)

    
rev

[{'rev_id': 1, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 2, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 3, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 4, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 5, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 6, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 7, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 8, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 9, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 10, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 11, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 12, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'},
 {'rev_id': 13, 'blob_sentiment': 'positive', 'vader_sentiment': 'positiv

In [9]:
# Retrieving the reviews 

reviews[20].text

'2046 is complete balls. A film created by an idiot for idiots, find Wong Kar Wai\'s latest film exactly the same as all of his others - "beautiful" cinematography (exactly what you expect when you hire three million-dollar cinematographers) alongside a complete lack of humanity and interest. Calling Wong Kar Wai a hack is an insult to all of the real hacks who are churning out garbage and making a mint from dumb audiences - the real difference is, here, that Wong Kar Wai fans actually believe they\'re watching something amazing, smart and groundbreaking.A film for emotionally stunted 15 year olds, the story involves a complete moron whose inability to deal with the women in his life or treat them as actual humans leads him back to a hotel room of emotional significance, in which he proceeds to write a horrible sci-fi story (courtesy of LG!) in attempt to deal with the fact that he\'s so immature and so pathetic that he can\'t have any real relationships, only hollow shells because he 

In [10]:
# Retrieving the review sentiment 

rev[20]

{'rev_id': 21, 'blob_sentiment': 'positive', 'vader_sentiment': 'positive'}

In [11]:
# Converting all the review sentiments to dataframe

df =  pd.DataFrame(rev)
df.head()


Unnamed: 0,rev_id,blob_sentiment,vader_sentiment
0,1,positive,positive
1,2,positive,positive
2,3,positive,positive
3,4,positive,positive
4,5,positive,positive


In [12]:
# Most of the reviews seems good. That means it a good movie

In [38]:
# adding the reviews to data frame 

rev_text= pd.DataFrame()


rev_text['text'] = reviews
rev_text

Unnamed: 0,text
0,"[2046 was directed by Kar Wai Wong, who also d..."
1,"[Review: 2046 (2004) By Ken Lee, [], [], Sever..."
2,"[I love story with impact, new ideas and rich ..."
3,[There is a strong tragic feeling the film has...
4,[I went to watch 2046 after reading millions a...
5,[2046 is more like the sequel to In The Mood f...
6,"[A journalist quits his job, when his novels b..."
7,[Kar Wai Wong is more than a film director (th...
8,[Please note that this film is almost universa...
9,[I read different takes on 2046 and its connec...


In [40]:
sia = SentimentIntensityAnalyzer()
sia.polarity_scores()

{'neg': 0.079, 'neu': 0.817, 'pos': 0.104, 'compound': 0.9795}

In [41]:
# adding each reviewscore to data table
rev_text['scores'] = rev_text['text'].apply(lambda text: sia.polarity_scores(text.text))
rev_text.head()

Unnamed: 0,text,scores
0,"[2046 was directed by Kar Wai Wong, who also d...","{'neg': 0.02, 'neu': 0.879, 'pos': 0.101, 'com..."
1,"[Review: 2046 (2004) By Ken Lee, [], [], Sever...","{'neg': 0.079, 'neu': 0.817, 'pos': 0.104, 'co..."
2,"[I love story with impact, new ideas and rich ...","{'neg': 0.073, 'neu': 0.82, 'pos': 0.108, 'com..."
3,[There is a strong tragic feeling the film has...,"{'neg': 0.087, 'neu': 0.801, 'pos': 0.112, 'co..."
4,[I went to watch 2046 after reading millions a...,"{'neg': 0.054, 'neu': 0.819, 'pos': 0.126, 'co..."


In [42]:
# also adding score
rev_text['Positive']  = rev_text['scores'].apply(lambda scores: scores['pos'])
rev_text['Negstive']  = rev_text['scores'].apply(lambda scores: scores['neg'])
rev_text['compound']  = rev_text['scores'].apply(lambda scores: scores['compound'])
rev_text.head()

Unnamed: 0,text,scores,Positive,Negstive,compound
0,"[2046 was directed by Kar Wai Wong, who also d...","{'neg': 0.02, 'neu': 0.879, 'pos': 0.101, 'com...",0.101,0.02,0.9697
1,"[Review: 2046 (2004) By Ken Lee, [], [], Sever...","{'neg': 0.079, 'neu': 0.817, 'pos': 0.104, 'co...",0.104,0.079,0.9795
2,"[I love story with impact, new ideas and rich ...","{'neg': 0.073, 'neu': 0.82, 'pos': 0.108, 'com...",0.108,0.073,0.9847
3,[There is a strong tragic feeling the film has...,"{'neg': 0.087, 'neu': 0.801, 'pos': 0.112, 'co...",0.112,0.087,0.9505
4,[I went to watch 2046 after reading millions a...,"{'neg': 0.054, 'neu': 0.819, 'pos': 0.126, 'co...",0.126,0.054,0.994


In [46]:
# adding sentiment label
rev_text['Sentiment'] = rev_text['compound'].apply(lambda c: 'Pos' if c >0 else 'Neg')

In [47]:
rev_text.head()

Unnamed: 0,text,scores,Positive,Negstive,compound,Sentiment
0,"[2046 was directed by Kar Wai Wong, who also d...","{'neg': 0.02, 'neu': 0.879, 'pos': 0.101, 'com...",0.101,0.02,0.9697,Pos
1,"[Review: 2046 (2004) By Ken Lee, [], [], Sever...","{'neg': 0.079, 'neu': 0.817, 'pos': 0.104, 'co...",0.104,0.079,0.9795,Pos
2,"[I love story with impact, new ideas and rich ...","{'neg': 0.073, 'neu': 0.82, 'pos': 0.108, 'com...",0.108,0.073,0.9847,Pos
3,[There is a strong tragic feeling the film has...,"{'neg': 0.087, 'neu': 0.801, 'pos': 0.112, 'co...",0.112,0.087,0.9505,Pos
4,[I went to watch 2046 after reading millions a...,"{'neg': 0.054, 'neu': 0.819, 'pos': 0.126, 'co...",0.126,0.054,0.994,Pos
