## Import packages

In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
import re
import string

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

nltk.download('stopwords')

from nltk.corpus import stopwords

stopword = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


## Read/Import Data

In [8]:
data = pd.read_csv('/content/drive/MyDrive/machine-learning-data/flipkart_reviews.csv')

print(data.head(10))

print(data.isnull().sum())

                                        Product_name  \
0  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
1  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
2  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
3  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
4  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
5  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
6  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
7  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
8  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
9  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   

                                              Review  Rating  
0  Best under 60k Great performanceI got it for a...       5  
1                                 Good perfomence...       5  
2  Great performance but usually it has also that...       5  
3           My wife is so happy and best product 👌🏻😘       5  
4  Light weight laptop with new amazing features,...       5  
5  Am

In [10]:

stemmer = nltk.SnowballStemmer("english")

def clean(text):
  text=str(text).lower()
  text=re.sub('\[.*?]','',text)
  text=re.sub('https?://\S+|WWW.\S+','',text)
  text=re.sub('<.*?>+','',text)
  text=re.sub('n','',text)
  text=re.sub('\W*d\W*','',text)
  #text=re.sub('[%%S]' %% re.escape(string.string.punctuation), '', text)

  text=[word for word in text.split(' ') ]
  text="".join(text)
  text=[stemmer.stem(word) for word in text.split(' ')]
  text="".join(text)
  return text

data["Review"] = data["Review"].apply(clean)

## **Visualize the Data - Pie Chart**

In [13]:
ratings = data["Rating"].value_counts()
numbers = ratings.index
quantity = ratings.values

import plotly.express as px
figure = px.pie(data, values=quantity, names=numbers, hole=0.5)
figure.show()

## **Sentiment Intensity Analyzer**

In [15]:
nltk.download('vader_lexicon')
sentiments= SentimentIntensityAnalyzer()
data['Positive']= [sentiments.polarity_scores(i)["pos"] for i in data["Review"]]
data['Negative']= [sentiments.polarity_scores(i)["neg"] for i in data["Review"]]
data['Neutral']= [sentiments.polarity_scores(i)["neu"] for i in data["Review"]]

data = data[["Review", "Positive", "Negative", "Neutral"]]

print(data.head(10))

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                                              Review  Positive  Negative  \
0  bestuer60kgreatperformaceigotitforarou58500bat...       0.0       0.0   
1                                    gooperfomece...       0.0       0.0   
2  greatperformacebutusuallyithasalsothatgamiglap...       0.0       0.0   
3                      mywifeissohappyabestprouct👌🏻😘       0.0       0.0   
4  lightweightlaptopwithewamazigfeatures,batteryl...       0.0       0.0   
5       amaziglaptop,amsomuchhappy,thaksforflipkart.       0.0       0.0   
6                      overallagoolaptopforpersoalus       0.0       0.0   
7                              thakyousomuchflipkart       0.0       0.0   
8                                       amazigprouct       0.0       0.0   
9  gooforormalwork,stuets,olieclasses,watchigmovi...       0.0       0.0   

   Neutral  
0      1.0  
1      1.0  
2      1.0  
3      1.0  
4      1.0  
5      1.0  
6      1.0  
7      1.0  
8      1.0  
9      1.0  


## **Overall Sentiment Score**

In [17]:
x=sum(data["Positive"])
y=sum(data["Negative"])
z=sum(data["Neutral"])

def sentimentScore(a,b,c):
  if(a >b) and (a > c):
    print("Positive")
  elif (b >1) and (b > c):
    print("Negative")
  else:
    print("Neutral")

sentimentScore(x,y,z)


Neutral


## **Reason**

In [19]:
print("Positve: ", x)
print("Negative: ", y)
print("Neutral: ", z)

Positve:  32.0
Negative:  0.0
Neutral:  2272.0
