In [1]:
!pip install textblob



### Importing packages and loading data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import re
import string

import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob

from sklearn.metrics import accuracy_score,classification_report, confusion_matrix

In [2]:
df = pd.read_csv('C:\Python Scripts\Datasets\Womens Clothing Reviews.csv', header=0,index_col=0)
df.head()

Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name
0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates
1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses
2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses
3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants
4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses


### Data preparation

In [37]:
text = df[['Rating', 'Recommended IND', 'Review Text']]
text.head()

Unnamed: 0,Rating,Recommended IND,Review Text
0,4,1,Absolutely wonderful - silky and sexy and comf...
1,5,1,Love this dress! it's sooo pretty. i happene...
2,3,0,I had such high hopes for this dress and reall...
3,5,1,"I love, love, love this jumpsuit. it's fun, fl..."
4,5,1,This shirt is very flattering to all due to th...


In [38]:
text = text.rename(columns={"Review Text": "Review", "Recommended IND":"Recommended"})
text.head()

Unnamed: 0,Rating,Recommended,Review
0,4,1,Absolutely wonderful - silky and sexy and comf...
1,5,1,Love this dress! it's sooo pretty. i happene...
2,3,0,I had such high hopes for this dress and reall...
3,5,1,"I love, love, love this jumpsuit. it's fun, fl..."
4,5,1,This shirt is very flattering to all due to th...


In [39]:
text.shape

(23486, 3)

In [40]:
text.isnull().sum()

Rating           0
Recommended      0
Review         845
dtype: int64

In [41]:
text.dropna(subset=['Review'], inplace=True)

In [42]:
text['Review'] = text['Review'].astype(str)

We will create a new column that will consider the ratings as follows:

We assign the values from the ranking to the positive, neutral and negative values. Rating 4 or higher to Positive, equal to 3 Neutral and rating 2 or less to Negative.

In [43]:
text.head()

Unnamed: 0,Rating,Recommended,Review
0,4,1,Absolutely wonderful - silky and sexy and comf...
1,5,1,Love this dress! it's sooo pretty. i happene...
2,3,0,I had such high hopes for this dress and reall...
3,5,1,"I love, love, love this jumpsuit. it's fun, fl..."
4,5,1,This shirt is very flattering to all due to th...


In [44]:
def rating(row):
    if row >= 4:
        return 'Positive'
    if row == 3:
        return 'Neutral'
    if row <= 2:
        return 'Negative'

In [45]:
text['Class'] = text['Rating'].apply(rating)
text.head(10)

Unnamed: 0,Rating,Recommended,Review,Class
0,4,1,Absolutely wonderful - silky and sexy and comf...,Positive
1,5,1,Love this dress! it's sooo pretty. i happene...,Positive
2,3,0,I had such high hopes for this dress and reall...,Neutral
3,5,1,"I love, love, love this jumpsuit. it's fun, fl...",Positive
4,5,1,This shirt is very flattering to all due to th...,Positive
5,2,0,"I love tracy reese dresses, but this one is no...",Negative
6,5,1,I aded this in my basket at hte last mintue to...,Positive
7,4,1,"I ordered this in carbon for store pick up, an...",Positive
8,5,1,I love this dress. i usually get an xs but it ...,Positive
9,5,1,"I'm 5""5' and 125 lbs. i ordered the s petite t...",Positive


In [46]:
text['Class'].value_counts()

Positive    17448
Neutral      2823
Negative     2370
Name: Class, dtype: int64

### Vader sentiment

We create the sentiment classifier:

In [47]:
analyser = SentimentIntensityAnalyzer()

def sentiment_scores(text):
    score = analyser.polarity_scores(text)
    print("{:-<40} {}".format(text, str(score)))

In [48]:
sentiment_scores(text['Review'].iloc[2])

I had such high hopes for this dress and really wanted it to work for me. i initially ordered the petite small (my usual size) but i found this to be outrageously small. so small in fact that i could not zip it up! i reordered it in petite medium, which was just ok. overall, the top half was comfortable and fit nicely, but the bottom half had a very tight under layer and several somewhat cheap (net) over layers. imo, a major design flaw was the net over layer sewn directly into the zipper - it c {'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'compound': 0.9427}


Positive, negative and neutral results represent the proportion of the text that falls in these categories. This means that our opinion was rated as 79% neutral, 18% positive and 0.3% negative. All results should add up to 1. The Compound score is a metric that calculates the sum of all lexicon scores that have been normalized between -1 (most extreme negative) and +1 (most extreme positive). Compound is 0.94, which means very high positive sentiment.

We add score to the all reviews:

In [49]:
text['Scores'] = text['Review'].apply(lambda Review: analyser.polarity_scores(Review))
text.head()

Unnamed: 0,Rating,Recommended,Review,Class,Scores
0,4,1,Absolutely wonderful - silky and sexy and comf...,Positive,"{'neg': 0.0, 'neu': 0.272, 'pos': 0.728, 'comp..."
1,5,1,Love this dress! it's sooo pretty. i happene...,Positive,"{'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp..."
2,3,0,I had such high hopes for this dress and reall...,Neutral,"{'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'co..."
3,5,1,"I love, love, love this jumpsuit. it's fun, fl...",Positive,"{'neg': 0.226, 'neu': 0.34, 'pos': 0.434, 'com..."
4,5,1,This shirt is very flattering to all due to th...,Positive,"{'neg': 0.0, 'neu': 0.7, 'pos': 0.3, 'compound..."


We calculate "Compound" to all sentences:

In [51]:
text['Compound'] = text['Scores'].apply(lambda score_dict: score_dict['compound'])
text.head()

Unnamed: 0,Rating,Recommended,Review,Class,Scores,Compound
0,4,1,Absolutely wonderful - silky and sexy and comf...,Positive,"{'neg': 0.0, 'neu': 0.272, 'pos': 0.728, 'comp...",0.8932
1,5,1,Love this dress! it's sooo pretty. i happene...,Positive,"{'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp...",0.9729
2,3,0,I had such high hopes for this dress and reall...,Neutral,"{'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'co...",0.9427
3,5,1,"I love, love, love this jumpsuit. it's fun, fl...",Positive,"{'neg': 0.226, 'neu': 0.34, 'pos': 0.434, 'com...",0.5727
4,5,1,This shirt is very flattering to all due to th...,Positive,"{'neg': 0.0, 'neu': 0.7, 'pos': 0.3, 'compound...",0.9291


Each review has a negative, neutral, positive and complex result. The complex result is a comprehensive assessment of the first three points and this score is in the range of -1 to 1. Based on these results, we can determine the sentiment. We set score threshold ourselves and here we can set it to ± 0.5. If Compound is bigger than 0.5 the review is positive, from 0 to 0.5 is neutral and below than 0 is negative.

In [52]:
def sentiment (score):
    if score >= 0.5:
        return 'Positive'
    if (score > 0) and (score < 0.5):
        return 'Neutral'
    if score <= 0:
        return 'Negative'


We add "Sentiment" to all sentences:

In [53]:
text['Sentiment'] = text['Compound'].apply(sentiment)
text.head(10)

Unnamed: 0,Rating,Recommended,Review,Class,Scores,Compound,Sentiment
0,4,1,Absolutely wonderful - silky and sexy and comf...,Positive,"{'neg': 0.0, 'neu': 0.272, 'pos': 0.728, 'comp...",0.8932,Positive
1,5,1,Love this dress! it's sooo pretty. i happene...,Positive,"{'neg': 0.0, 'neu': 0.664, 'pos': 0.336, 'comp...",0.9729,Positive
2,3,0,I had such high hopes for this dress and reall...,Neutral,"{'neg': 0.027, 'neu': 0.792, 'pos': 0.181, 'co...",0.9427,Positive
3,5,1,"I love, love, love this jumpsuit. it's fun, fl...",Positive,"{'neg': 0.226, 'neu': 0.34, 'pos': 0.434, 'com...",0.5727,Positive
4,5,1,This shirt is very flattering to all due to th...,Positive,"{'neg': 0.0, 'neu': 0.7, 'pos': 0.3, 'compound...",0.9291,Positive
5,2,0,"I love tracy reese dresses, but this one is no...",Negative,"{'neg': 0.0, 'neu': 0.853, 'pos': 0.147, 'comp...",0.9419,Positive
6,5,1,I aded this in my basket at hte last mintue to...,Positive,"{'neg': 0.023, 'neu': 0.881, 'pos': 0.096, 'co...",0.8004,Positive
7,4,1,"I ordered this in carbon for store pick up, an...",Positive,"{'neg': 0.046, 'neu': 0.901, 'pos': 0.053, 'co...",-0.0909,Negative
8,5,1,I love this dress. i usually get an xs but it ...,Positive,"{'neg': 0.0, 'neu': 0.818, 'pos': 0.182, 'comp...",0.7175,Positive
9,5,1,"I'm 5""5' and 125 lbs. i ordered the s petite t...",Positive,"{'neg': 0.12, 'neu': 0.753, 'pos': 0.126, 'com...",-0.3724,Negative


Let's see how sentiments are distributed:

In [54]:
text['Sentiment'].value_counts()

Positive    19119
Neutral      1947
Negative     1575
Name: Sentiment, dtype: int64

**Assessment of sentiment analysis**

We will compare them with those we originally assigned by ranking:

In [55]:
accuracy_score(text['Class'],text['Sentiment'])

0.7622454838567201

In [56]:
print(classification_report(text['Class'],text['Sentiment']))

              precision    recall  f1-score   support

    Negative       0.43      0.28      0.34      2370
     Neutral       0.27      0.19      0.22      2823
    Positive       0.84      0.92      0.88     17448

    accuracy                           0.76     22641
   macro avg       0.51      0.46      0.48     22641
weighted avg       0.73      0.76      0.74     22641



In [57]:
print(confusion_matrix(text['Class'],text['Sentiment']))

[[  675   492  1203]
 [  440   525  1858]
 [  460   930 16058]]


Based on the above report, we can see that our fit has achieved 76% accuracy. One can see that received results show different sentiment distribution. The Vader method assigned more positive reviews than original rating so the amount of neutral and negative reviews reduced. There are also differences in the designation of individual sentiments, for example one of them was marked as positive, where it was negative in the original ranking.


### Cleaning data

Now we wiil check how the results look like after cleaning the data. We remove punctuation and Stopwords. 

In [58]:
reviews = text[['Class', 'Review']]
reviews.head()

Unnamed: 0,Class,Review
0,Positive,Absolutely wonderful - silky and sexy and comf...
1,Positive,Love this dress! it's sooo pretty. i happene...
2,Neutral,I had such high hopes for this dress and reall...
3,Positive,"I love, love, love this jumpsuit. it's fun, fl..."
4,Positive,This shirt is very flattering to all due to th...


Remove punctuation:

In [59]:
def clean_text(words):
    words = re.sub("[^a-zA-Z]"," ", words)
    text = words.lower().split()                   
    return " ".join(text)

In [60]:
reviews['Review'] = reviews['Review'].apply(clean_text)
reviews.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Class,Review
0,Positive,absolutely wonderful silky and sexy and comfor...
1,Positive,love this dress it s sooo pretty i happened to...
2,Neutral,i had such high hopes for this dress and reall...
3,Positive,i love love love this jumpsuit it s fun flirty...
4,Positive,this shirt is very flattering to all due to th...


Stopwords:

In [61]:
# Show some stop words
stop_words = stopwords.words('english')
print(stop_words[::10])

['i', "you've", 'himself', 'they', 'that', 'been', 'a', 'while', 'through', 'in', 'here', 'few', 'own', 'just', 're', 'doesn', 'ma', "shouldn't"]


In [62]:
def remove_stopwords(rows):
    rows = [word.lower() for word in rows.split() if word.lower() not in stop_words]
    return " ".join(rows)

In [63]:
reviews['Review'] = reviews['Review'].apply(remove_stopwords)
reviews.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Class,Review
0,Positive,absolutely wonderful silky sexy comfortable
1,Positive,love dress sooo pretty happened find store gla...
2,Neutral,high hopes dress really wanted work initially ...
3,Positive,love love love jumpsuit fun flirty fabulous ev...
4,Positive,shirt flattering due adjustable front tie perf...


Now we are using Vader sentiment:

In [64]:
reviews['Scores'] = reviews['Review'].apply(lambda Review: analyser.polarity_scores(Review))
reviews.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Class,Review,Scores
0,Positive,absolutely wonderful silky sexy comfortable,"{'neg': 0.0, 'neu': 0.154, 'pos': 0.846, 'comp..."
1,Positive,love dress sooo pretty happened find store gla...,"{'neg': 0.0, 'neu': 0.492, 'pos': 0.508, 'comp..."
2,Neutral,high hopes dress really wanted work initially ...,"{'neg': 0.038, 'neu': 0.693, 'pos': 0.269, 'co..."
3,Positive,love love love jumpsuit fun flirty fabulous ev...,"{'neg': 0.171, 'neu': 0.185, 'pos': 0.644, 'co..."
4,Positive,shirt flattering due adjustable front tie perf...,"{'neg': 0.0, 'neu': 0.494, 'pos': 0.506, 'comp..."


In [65]:
reviews['Compound'] = reviews['Scores'].apply(lambda score_dict: score_dict['compound'])
reviews['Sentiment'] = reviews['Compound'].apply(sentiment)
reviews.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Class,Review,Scores,Compound,Sentiment
0,Positive,absolutely wonderful silky sexy comfortable,"{'neg': 0.0, 'neu': 0.154, 'pos': 0.846, 'comp...",0.8991,Positive
1,Positive,love dress sooo pretty happened find store gla...,"{'neg': 0.0, 'neu': 0.492, 'pos': 0.508, 'comp...",0.971,Positive
2,Neutral,high hopes dress really wanted work initially ...,"{'neg': 0.038, 'neu': 0.693, 'pos': 0.269, 'co...",0.9062,Positive
3,Positive,love love love jumpsuit fun flirty fabulous ev...,"{'neg': 0.171, 'neu': 0.185, 'pos': 0.644, 'co...",0.9464,Positive
4,Positive,shirt flattering due adjustable front tie perf...,"{'neg': 0.0, 'neu': 0.494, 'pos': 0.506, 'comp...",0.9062,Positive
5,Negative,love tracy reese dresses one petite feet tall ...,"{'neg': 0.0, 'neu': 0.732, 'pos': 0.268, 'comp...",0.9153,Positive
6,Positive,aded basket hte last mintue see would look lik...,"{'neg': 0.048, 'neu': 0.827, 'pos': 0.125, 'co...",0.6361,Positive
7,Positive,ordered carbon store pick ton stuff always try...,"{'neg': 0.042, 'neu': 0.832, 'pos': 0.126, 'co...",0.5709,Positive
8,Positive,love dress usually get xs runs little snug bus...,"{'neg': 0.0, 'neu': 0.698, 'pos': 0.302, 'comp...",0.7579,Positive
9,Positive,lbs ordered petite make sure length long typic...,"{'neg': 0.0, 'neu': 0.564, 'pos': 0.436, 'comp...",0.9643,Positive


Let's check the distribution of variables:

In [66]:
reviews['Sentiment'].value_counts()

Positive    20407
Neutral      1471
Negative      763
Name: Sentiment, dtype: int64

In [67]:
reviews.loc[reviews['Sentiment'] == 'Negative'].head()

Unnamed: 0,Class,Review,Scores,Compound,Sentiment
77,Negative,zipper broke piece first time wore disappointi...,"{'neg': 0.295, 'neu': 0.475, 'pos': 0.231, 'co...",-0.2263,Negative
104,Negative,runs big looked unflattering petite might work...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Negative
110,Negative,thin poor quality especially price felt like t...,"{'neg': 0.25, 'neu': 0.565, 'pos': 0.184, 'com...",-0.3892,Negative
191,Negative,minimally torn whether return ultimately going...,"{'neg': 0.184, 'neu': 0.816, 'pos': 0.0, 'comp...",-0.5456,Negative
214,Negative,skirt received little blue green mostly white ...,"{'neg': 0.225, 'neu': 0.602, 'pos': 0.173, 'co...",-0.3182,Negative


In [68]:
reviews.loc[reviews['Sentiment'] == 'Neutral'].head()

Unnamed: 0,Class,Review,Scores,Compound,Sentiment
10,Neutral,dress runs small esp zipper area runs ordered ...,"{'neg': 0.084, 'neu': 0.782, 'pos': 0.135, 'co...",0.2263,Neutral
22,Negative,first pullover styling side zipper purchased k...,"{'neg': 0.112, 'neu': 0.765, 'pos': 0.122, 'co...",0.1027,Neutral
32,Positive,pants even better person downside need dry cle...,"{'neg': 0.183, 'neu': 0.55, 'pos': 0.266, 'com...",0.2263,Neutral
103,Negative,fabric felt cheap find flattering top referenc...,"{'neg': 0.0, 'neu': 0.687, 'pos': 0.313, 'comp...",0.4767,Neutral
116,Positive,prior reviewer nailed summary dress definitely...,"{'neg': 0.0, 'neu': 0.828, 'pos': 0.172, 'comp...",0.4019,Neutral


Comparing with ranking:

In [69]:
accuracy_score(reviews['Class'],reviews['Sentiment'])

0.7609204540435494

In [70]:
print(classification_report(reviews['Class'],reviews['Sentiment']))

              precision    recall  f1-score   support

    Negative       0.48      0.15      0.23      2370
     Neutral       0.23      0.12      0.16      2823
    Positive       0.81      0.95      0.87     17448

    accuracy                           0.76     22641
   macro avg       0.51      0.41      0.42     22641
weighted avg       0.70      0.76      0.72     22641



### TextBlob

In [71]:
data = text[['Review', 'Class']]
data.head()

Unnamed: 0,Review,Class
0,Absolutely wonderful - silky and sexy and comf...,Positive
1,Love this dress! it's sooo pretty. i happene...,Positive
2,I had such high hopes for this dress and reall...,Neutral
3,"I love, love, love this jumpsuit. it's fun, fl...",Positive
4,This shirt is very flattering to all due to th...,Positive


In [72]:
blob_text = data['Review'][0]
blob_text

'Absolutely wonderful - silky and sexy and comfortable'

In [73]:
blob = TextBlob(blob_text)
print(format(blob.sentiment))

Sentiment(polarity=0.6333333333333333, subjectivity=0.9333333333333332)


The above result is a tuple representing the polarization and subjectivity of review. The polarity of the sentence is 0.63, indicating that the sentiment is positive. The subjectivity of the text which is 0.93 in our example. The value closer to 1 indicates that the sentence is mostly a public opinion and not a factual piece of information and vice versa. 

Since we are interested in the sentiment, we will only extract the polarity and apply it to all the observations:

In [74]:
data['Polarity'] = data['Review'].apply(lambda x: TextBlob(x).sentiment[0])
data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Review,Class,Polarity
0,Absolutely wonderful - silky and sexy and comf...,Positive,0.633333
1,Love this dress! it's sooo pretty. i happene...,Positive,0.339583
2,I had such high hopes for this dress and reall...,Neutral,0.073675
3,"I love, love, love this jumpsuit. it's fun, fl...",Positive,0.55
4,This shirt is very flattering to all due to th...,Positive,0.512891
5,"I love tracy reese dresses, but this one is no...",Negative,0.17875
6,I aded this in my basket at hte last mintue to...,Positive,0.13375
7,"I ordered this in carbon for store pick up, an...",Positive,0.171635
8,I love this dress. i usually get an xs but it ...,Positive,0.0025
9,"I'm 5""5' and 125 lbs. i ordered the s petite t...",Positive,0.2042


In [76]:
def sentiment_score(polarity):
    if polarity < 0:
        return "Negative"
    elif polarity > 0:
        return "Positive"
    else:
        return "Neutral"

We add "Sentiment" to all sentences:

In [77]:
data['Sentiment'] = data['Polarity'].apply(sentiment_score)
data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Review,Class,Polarity,Sentiment
0,Absolutely wonderful - silky and sexy and comf...,Positive,0.633333,Positive
1,Love this dress! it's sooo pretty. i happene...,Positive,0.339583,Positive
2,I had such high hopes for this dress and reall...,Neutral,0.073675,Positive
3,"I love, love, love this jumpsuit. it's fun, fl...",Positive,0.55,Positive
4,This shirt is very flattering to all due to th...,Positive,0.512891,Positive
5,"I love tracy reese dresses, but this one is no...",Negative,0.17875,Positive
6,I aded this in my basket at hte last mintue to...,Positive,0.13375,Positive
7,"I ordered this in carbon for store pick up, an...",Positive,0.171635,Positive
8,I love this dress. i usually get an xs but it ...,Positive,0.0025,Positive
9,"I'm 5""5' and 125 lbs. i ordered the s petite t...",Positive,0.2042,Positive


Let's see how sentiments are distributed:

In [78]:
data['Sentiment'].value_counts()

Positive    21227
Negative     1322
Neutral        92
Name: Sentiment, dtype: int64

In [79]:
data.loc[data['Sentiment'] == 'Negative'].head()

Unnamed: 0,Review,Class,Polarity,Sentiment
10,Dress runs small esp where the zipper area run...,Neutral,-0.097149,Negative
14,This is a nice choice for holiday gatherings. ...,Neutral,-0.057143,Negative
22,"First of all, this is not pullover styling. th...",Negative,-0.045595,Negative
44,Tried this on today at my local retailer and h...,Positive,-0.046374,Negative
72,I have a short torso and this works well for m...,Positive,-0.079365,Negative


In [80]:
data.loc[data['Sentiment'] == 'Neutral'].head()

Unnamed: 0,Review,Class,Polarity,Sentiment
28,I have several of goodhyouman shirts and i get...,Positive,0.0,Neutral
104,"Runs big and looked unflattering. i am petite,...",Negative,0.0,Neutral
751,"Can be a bit itchy sometimes, i guess dependin...",Positive,0.0,Neutral
897,I like the skirt. that said it is going to the...,Positive,0.0,Neutral
976,I was initially attracted to the colors,Positive,0.0,Neutral


We will compare them with originally ranking:

In [81]:
accuracy_score(data['Class'],data['Sentiment'])

0.7731990636455986

In [82]:
print(classification_report(data['Class'],data['Sentiment']))

              precision    recall  f1-score   support

    Negative       0.41      0.23      0.29      2370
     Neutral       0.16      0.01      0.01      2823
    Positive       0.80      0.97      0.88     17448

    accuracy                           0.77     22641
   macro avg       0.46      0.40      0.39     22641
weighted avg       0.68      0.77      0.71     22641

