## Flipkart Review Sentiment Analsis Using Machine Learning

## Import Packages

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import nltk
import re

import string

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from wordcloud import WordCloud , STOPWORDS, ImageColorGenerator

nltk.download("stopwords")
from nltk.corpus import stopwords

stopword=set(stopwords.words("english"))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Import Dataset

In [None]:
data=pd.read_csv("flipkart_reviews.csv")

print(data.head(10))

print(data.isnull().sum())

                                        Product_name  \
0  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
1  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
2  Lenovo Ideapad Gaming 3 Ryzen 5 Hexa Core 5600...   
3  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
4  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
5  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
6  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
7  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
8  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   
9  DELL Inspiron Athlon Dual Core 3050U - (4 GB/2...   

                                              Review  Rating  
0  Best under 60k Great performanceI got it for a...       5  
1                                 Good perfomence...       5  
2  Great performance but usually it has also that...       5  
3           My wife is so happy and best product 👌🏻😘       5  
4  Light weight laptop with new amazing features,...       5  
5  Am

## Cleaning the Dataset

In [None]:
import re
import string
import nltk
from nltk.stem import SnowballStemmer

stemmer = SnowballStemmer("english")

def clean(text):
    text = str(text).lower()
    text = re.sub("\[.*?\]", "", text)
    text = re.sub("https?://\S+|www\.\S+", "", text)  # Updated to lowercase www
    text = re.sub("<.*?>", "", text)
    text = re.sub("\n", "", text)
    text = re.sub(r"\W*\d\W*", "", text)  # Added 'r' for raw string
    text = re.sub("[%s]" % re.escape(string.punctuation), "", text)  # Corrected syntax for punctuation removal

    words = text.split()
    stemmed_words = [stemmer.stem(word) for word in words]
    text = " ".join(stemmed_words)

    return text

data["Review"] = data["Review"].apply(clean)


## Visualizing the Ratings on a Piechart

In [None]:
ratings = data["Rating"].value_counts()
numbers=ratings.index
quality=ratings.values

import plotly.express as px

figure=px.pie(data,values=quality,names=numbers, hole=0.4)

figure.show()


## Sentiment Intensity Analyser

In [20]:
nltk.download("vader_lexicon")

sid = SentimentIntensityAnalyzer()

data["Positive"] = [sid.polarity_scores(i)["pos"] for i in data["Review"]]
data["Negative"] = [sid.polarity_scores(i)["neg"] for i in data["Review"]]
data["Neutral"] = [sid.polarity_scores(i)["neu"] for i in data["Review"]]

data=data[["Review", "Positive", "Negative", "Neutral"]]

print(data.head(10))

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


                                              Review  Positive  Negative  \
0  best underk great performancei got it for arou...     0.317     0.076   
1                                        good perfom     0.744     0.000   
2  great perform but usual it has also that game ...     0.142     0.000   
3           my wife is so happi and best product 👌🏻😘     0.358     0.000   
4  light weight laptop with new amaz featur batte...     0.000     0.000   
5    amaz laptop am so much happi thank for flipkart     0.257     0.000   
6              over all a good laptop for person use     0.326     0.000   
7                         thank you so much flipkart     0.385     0.000   
8                                       amaz product     0.000     0.000   
9  good for normal work student onlin class watch...     0.188     0.000   

   Neutral  
0    0.607  
1    0.256  
2    0.858  
3    0.642  
4    1.000  
5    0.743  
6    0.674  
7    0.615  
8    1.000  
9    0.812  


## Overall Sentiment Score

In [23]:
# Summing up the sentiment scores
x = sum(data["Positive"])
y = sum(data["Negative"])
z = sum(data["Neutral"])

def sentiment_score(a, b, c):
    if (a > b) and (a > c):
        print("Overall Sentiment: Positive")
    elif (b > a) and (b > c):
        print("Overall Sentiment: Negative")
    else:
        print("Overall Sentiment: Neutral")

# Call the sentiment_score function
sentiment_score(x, y, z)

Overall Sentiment: Neutral


## Why we got Neutral

In [24]:
print("Positive:",x)
print("Negative:",y)
print("Neutral:",z)

Positive: 719.4610000000008
Negative: 93.31900000000022
Neutral: 1491.1880000000003
