# Aspect Based Sentiment Analysis 
Ice Cream Review Dataset

## Library

In [2]:
#!pip3 install spacy
#!python3 -m spacy download en
import pandas as pd
import spacy
import numpy as np

nlp = spacy.load('en')

df = pd.read_csv("reviews.csv")
df["full_text"] = df.title + ' ' + df.text
df.head()

Unnamed: 0,key,author,date,stars,title,helpful_yes,helpful_no,text,full_text
0,0_bj,Ilovebennjerry,2017-04-15,3,Not enough brownies!,10,3,"Super good, don't get me wrong. But I came for...","Not enough brownies! Super good, don't get me ..."
1,0_bj,Sweettooth909,2020-01-05,5,I’m OBSESSED with this pint!,3,0,I decided to try it out although I’m not a hug...,I’m OBSESSED with this pint! I decided to try ...
2,0_bj,LaTanga71,2018-04-26,3,My favorite...More Caramel Please,5,2,My caramel core begins to disappear about half...,My favorite...More Caramel Please My caramel c...
3,0_bj,chicago220,2018-01-14,5,Obsessed!!!,24,1,Why are people complaining about the blonde br...,Obsessed!!! Why are people complaining about t...
4,0_bj,Kassidyk,2020-07-24,1,Worst Ice Cream Ever!,1,5,This ice cream is worst ice cream I’ve ever ta...,Worst Ice Cream Ever! This ice cream is worst ...


## Preprocess

In [3]:
df.isnull().values.ravel().sum()

5506

In [4]:
df.loc[df.isnull().sum(1).astype(bool)].iloc[2000]

key                                                        28_bj
author                                                 krisa2337
date                                                  2019-02-01
stars                                                          5
title                                                        NaN
helpful_yes                                                    0
helpful_no                                                     0
text           By far the most addicting flavor EVER I love t...
full_text                                                    NaN
Name: 4899, dtype: object

In [5]:
df.dropna(inplace=True)

In [6]:
df.full_text = df.full_text.str.lower()

In [7]:
df['full_text'] = df['full_text'].astype(str)

In [8]:
df.dtypes

key            object
author         object
date           object
stars           int64
title          object
helpful_yes     int64
helpful_no      int64
text           object
full_text      object
dtype: object

In [9]:
df.describe()

Unnamed: 0,stars,helpful_yes,helpful_no
count,5204.0,5204.0,5204.0
mean,4.061875,1.45638,0.770753
std,1.505839,4.454936,3.157031
min,1.0,0.0,0.0
25%,3.0,0.0,0.0
50%,5.0,0.0,0.0
75%,5.0,1.0,1.0
max,5.0,105.0,86.0


In [10]:
sentiment_result = []
l=len(df)

for i in range(l):
    if df['stars'].iloc[i] >3:
        sentiment_result.append('positive')
    elif df['stars'].iloc[i] == 3:
        sentiment_result.append('neutral')
    else:
        sentiment_result.append('negative')
df['sentiment_result'] = sentiment_result
df.head()

Unnamed: 0,key,author,date,stars,title,helpful_yes,helpful_no,text,full_text,sentiment_result
0,0_bj,Ilovebennjerry,2017-04-15,3,Not enough brownies!,10,3,"Super good, don't get me wrong. But I came for...","not enough brownies! super good, don't get me ...",neutral
1,0_bj,Sweettooth909,2020-01-05,5,I’m OBSESSED with this pint!,3,0,I decided to try it out although I’m not a hug...,i’m obsessed with this pint! i decided to try ...,positive
2,0_bj,LaTanga71,2018-04-26,3,My favorite...More Caramel Please,5,2,My caramel core begins to disappear about half...,my favorite...more caramel please my caramel c...,neutral
3,0_bj,chicago220,2018-01-14,5,Obsessed!!!,24,1,Why are people complaining about the blonde br...,obsessed!!! why are people complaining about t...,positive
4,0_bj,Kassidyk,2020-07-24,1,Worst Ice Cream Ever!,1,5,This ice cream is worst ice cream I’ve ever ta...,worst ice cream ever! this ice cream is worst ...,negative


In [11]:
df.groupby('sentiment_result').count()

Unnamed: 0_level_0,key,author,date,stars,title,helpful_yes,helpful_no,text,full_text
sentiment_result,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
negative,1094,1094,1094,1094,1094,1094,1094,1094,1094
neutral,315,315,315,315,315,315,315,315,315
positive,3795,3795,3795,3795,3795,3795,3795,3795,3795


## Sentiment Extraction

In [12]:
sentiment_terms = []

for review in nlp.pipe(df['full_text']):
    if review.is_parsed:
        sentiment_terms.append(' '.join([token.lemma_ for token in review if (not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
    else:
        sentiment_terms.append('')
df['sentiment_terms'] = sentiment_terms
df.head(7)

Unnamed: 0,key,author,date,stars,title,helpful_yes,helpful_no,text,full_text,sentiment_result,sentiment_terms
0,0_bj,Ilovebennjerry,2017-04-15,3,Not enough brownies!,10,3,"Super good, don't get me wrong. But I came for...","not enough brownies! super good, don't get me ...",neutral,good wrong come sweet pack bountiful cry amazi...
1,0_bj,Sweettooth909,2020-01-05,5,I’m OBSESSED with this pint!,3,0,I decided to try it out although I’m not a hug...,i’m obsessed with this pint! i decided to try ...,positive,obsess decide try huge ok specific disappointi...
2,0_bj,LaTanga71,2018-04-26,3,My favorite...More Caramel Please,5,2,My caramel core begins to disappear about half...,my favorite...more caramel please my caramel c...,neutral,favorite caramel begin disappear half buy purc...
3,0_bj,chicago220,2018-01-14,5,Obsessed!!!,24,1,Why are people complaining about the blonde br...,obsessed!!! why are people complaining about t...,positive,obsess complain blonde blonde love blonde favo...
4,0_bj,Kassidyk,2020-07-24,1,Worst Ice Cream Ever!,1,5,This ice cream is worst ice cream I’ve ever ta...,worst ice cream ever! this ice cream is worst ...,negative,bad bad taste excited pick wait try sweet fine...
5,0_bj,Nikiera,2020-07-23,2,Way Too Salty,3,1,I bought this last night to go with Louisiana ...,way too salty i bought this last night to go w...,negative,salty buy crave salty sweet thrill salt carame...
6,0_bj,Mmelvin,2017-05-28,3,"Love this flavor, but...",3,3,"This is definitely my favorite flavor, but rec...","love this flavor, but... this is definitely my...",neutral,love favorite blonde look plain salt happen di...


In [13]:
df.sentiment_terms[0]

'good wrong come sweet pack bountiful cry amazing sweet disappointing like will buy live promising find well wayyy yummy chewy overall good great sweet brownie add'

In [14]:
df.sentiment_terms[1]

'obsess decide try huge ok specific disappointing buy believe second glorious big bother take fifth go daily need'

In [15]:
df.sentiment_terms[4]

'bad bad taste excited pick wait try sweet fine salt wrong disgusting salty inedible enjoy stomach love salt caramel come read think bad'

## Modeling

In [19]:
from keras.models import Sequential
from keras.layers import Dense

sentiment_model = Sequential()
sentiment_model.add(Dense(512, input_shape=(7808,), activation='relu'))
sentiment_model.add(Dense(3, activation='softmax'))
sentiment_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [20]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df.full_text)
sentiment_tokenized = pd.DataFrame(tokenizer.texts_to_matrix(df.sentiment_terms))

In [21]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

label_encoder_2 = LabelEncoder()
integer_sentiment = label_encoder_2.fit_transform(df.sentiment_result)
sentiment_res = to_categorical(integer_sentiment)

In [22]:
sentiment_tokenized.shape
sentiment_tokenized.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7798,7799,7800,7801,7802,7803,7804,7805,7806,7807
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
sentiment_res.shape
sentiment_res

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)

In [24]:
sentiment_model.fit(sentiment_tokenized, sentiment_res, epochs=5, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1633dd9d0>

## Predict

In [31]:
new_positive_review = "This ice cream has good taste and is nice and sweet"

In [32]:
chunks_p = [(chunk.root.text) for chunk in nlp(new_positive_review).noun_chunks if chunk.root.pos_ == 'NOUN']
new_review_aspect_terms_p = ' '.join(chunks_p)
new_review_aspect_tokenized_p = tokenizer.texts_to_matrix([new_review_aspect_terms_p])
# print(new_review_aspect_terms_p)

In [33]:
new_review_category_p = label_encoder_2.inverse_transform(sentiment_model.predict_classes(new_review_aspect_tokenized_p))
print(new_review_category_p)

['positive']


In [44]:
new_negative_review = "the salted caramel core is what’s really wrong."
# new_negative_review = " worst Ice cream ever! I don't like it, disgusting."

In [45]:
chunks_n = [(chunk.root.text) for chunk in nlp(new_negative_review).noun_chunks if chunk.root.pos_ == 'NOUN']
new_review_aspect_terms_n = ' '.join(chunks_n)
new_review_aspect_tokenized_n = tokenizer.texts_to_matrix([new_review_aspect_terms_n])
# print(new_review_aspect_terms_n)

In [46]:
new_review_category_n = label_encoder_2.inverse_transform(sentiment_model.predict_classes(new_review_aspect_tokenized_n))
print(new_review_category_n)

['negative']
