In [1]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import warnings
nltk.download('vader_lexicon')
warnings.filterwarnings('ignore')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ankit\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
#Load the dataset
def load_dataset():
    data = pd.read_csv('Reviews.csv')
    return data

In [3]:
#Drop the missing columns
def pre_processing():
    data = load_dataset().dropna()
    return data


In [4]:
#Finding the distribution of ratings
def distribution_of_ratings():
    data = pre_processing()
    ratings = data['Score'].value_counts()
    return ratings

In [5]:
#Calculating positive, negative and neutral values in the dataframe and merging with the original dataframe
def add_new_columns():
    data = pre_processing()
    analyzer = SentimentIntensityAnalyzer()

    pos = []
    neg = []
    neu = []

    for i in range(0,len(data)):
        scores = analyzer.polarity_scores(data['Text'][i])
        pos.append(scores['pos'])
        neg.append(scores['neg'])
        neu.append(scores['neu'])
    
    data['Positive'] = pos
    data['Negative'] = neg
    data['Neutral'] = neu

    data = data[['Id','ProductId','UserId','ProfileName','HelpfulnessNumerator','HelpfulnessDenominator','Score','Time','Summary','Text','Positive','Negative','Neutral']]
    return data


In [6]:
#Finding the sum of positive, negative and neutral values
def sentiment_scores():
    data = add_new_columns()

    x = data['Positive'].sum()
    y = data['Negative'].sum()
    z = data['Neutral'].sum()

    max_value = max(x,y,z)
    
    if max_value == x:
        sentiment = "Positive"
    elif max_value == y:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
    return x,y,z,sentiment

In [7]:
#Calling all functions
load_dataset()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...
...,...,...,...,...,...,...,...,...,...,...
14995,14996,B000EGZ99M,AH720G9X1MIQ8,KJT,1,1,5,1255824000,Rice Select Whole Wheat Orzo,Could no longer find Rice Select Whole Wheat O...
14996,14997,B000EGZ99M,A1MW2HEG4LF56B,S. Mariconda,3,4,5,1170633600,Absolutely wonderful comfort food!,I discovered this in our local QFC grocery sto...
14997,14998,B000EGZ99M,A1GY0FE07QFFLF,Arlington Cory,0,0,5,1326585600,Whole Wheat Orzo Tasty and Versatile,"Hard to find whole wheat orzo in supermarket, ..."
14998,14999,B000EGZ99M,A2N8ZFDXI5T6BW,Beverleaf,0,0,5,1322179200,Whole grain deliciousness,"This product is delicious, and healthier than ..."


In [8]:
data = pre_processing()
print(data)

          Id   ProductId          UserId                      ProfileName  \
0          1  B001E4KFG0  A3SGXH7AUHU8GW                       delmartian   
1          2  B00813GRG4  A1D87F6ZCVE5NK                           dll pa   
2          3  B000LQOCH0   ABXLMWJIXXAIN  Natalia Corres "Natalia Corres"   
3          4  B000UA0QIQ  A395BORC6FGVXV                             Karl   
4          5  B006K2ZZ7K  A1UQRSCLF8GW1T    Michael D. Bigham "M. Wassir"   
...      ...         ...             ...                              ...   
14995  14996  B000EGZ99M   AH720G9X1MIQ8                              KJT   
14996  14997  B000EGZ99M  A1MW2HEG4LF56B                     S. Mariconda   
14997  14998  B000EGZ99M  A1GY0FE07QFFLF                   Arlington Cory   
14998  14999  B000EGZ99M  A2N8ZFDXI5T6BW                        Beverleaf   
14999  15000  B000EGZ99M   ACLTZ4KSPHG1N                jodimae "jodimae"   

       HelpfulnessNumerator  HelpfulnessDenominator  Score        Time  \
0

In [9]:
ratings = distribution_of_ratings()
print('The rating distribution is\n',ratings)

The rating distribution is
 5    9350
4    2132
1    1425
3    1253
2     840
Name: Score, dtype: int64


In [10]:
x,y,z,sentiment = sentiment_scores()
print('The sentiment with maximum score is\n',sentiment)

The sentiment with maximum score is
 Neutral


In [11]:
print(f'Positive Score: {x}, Negative Score: {y}, Neutral Score: {z}')

Positive Score: 2879.8810000000003, Negative Score: 639.613, Neutral Score: 11480.474
