# Review Sentiment 

In [1]:
import random
import json
import pickle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn import svm

In [2]:
class Sentiment:
    NEGATIVE = "NEGATIVE"
    NEUTRAL = "NEUTRAL"
    POSITIVE = "POSITIVE"


class Review:
    def __init__(self, text, score):
        self.text = text
        self.score = score
        self.sentiment = self.get_sentiment()
        
    def get_sentiment(self):
        if self.score <= 2:
            return Sentiment.NEGATIVE
        elif self.score == 3:
            return Sentiment.NEUTRAL
        else:
            return Sentiment.POSITIVE
        
        
class ReviewContainer:
    def __init__(self, reviews):
        self.reviews = reviews   
        
    def get_text(self):
        return [x.text for x in self.reviews]
        
    def get_sentiment(self):
        return [x.sentiment for x in self.reviews]
        
    
    def evenly_distribute(self):
        negative = list(filter(lambda x: x.sentiment == Sentiment.NEGATIVE, self.reviews))
        positive = list(filter(lambda x: x.sentiment == Sentiment.POSITIVE, self.reviews))
        positive_shrunk = positive[:len(negative)]
        self.reviews = negative + positive_shrunk
        random.shuffle(self.reviews)
        
        

In [3]:
file_name = 'Books_small_10000.json'

reviews = [] 
with open(file_name) as f:
    for line in f:
        review = json.loads(line)
        reviews.append(Review(review['reviewText'], review['overall']))
        

In [4]:
reviews[84].text

"I'm very happy thus far with this purchase.Have made a total re-do of my eating habits and needed to know some OTHER recipes for the way I want to eat!Kudos for your efforts!Thanks againHAPPY"

In [5]:
reviews[74].text



In [6]:
reviews[4].text

'It was a decent read.. typical story line. Nothing unsavory as so many are. Just a slice of life, plausible.'

### Prep Data

In [7]:
training, test = train_test_split(reviews, test_size=0.33, random_state=42)

train_container = ReviewContainer(training)
test_container = ReviewContainer(test)


In [8]:
train_container.evenly_distribute()
train_x = train_container.get_text()
train_y = train_container.get_sentiment()

test_container.evenly_distribute()
test_x = test_container.get_text()
test_y = test_container.get_sentiment()

print(train_y.count(Sentiment.POSITIVE))
print(train_y.count(Sentiment.NEGATIVE))

436
436


### Bag of words vectorization

In [9]:
vectorizer = TfidfVectorizer()
# vectorizer = CountVectorizer()
vectorizer.fit_transform(train_x)

train_x_vectors = vectorizer.fit_transform(train_x)

test_x_vectors = vectorizer.transform(test_x)

print(train_x[0])
print(train_x_vectors[0])

Great premise for story line but too much rambling about nonsense.  Skimmed thru most of the story.  Had no attachment with characters.
  (0, 1362)	0.1378237621814384
  (0, 8760)	0.10068934343857351
  (0, 620)	0.38073932091137047
  (0, 5384)	0.15378911233339787
  (0, 3577)	0.14572443984562364
  (0, 7929)	0.0642992675940265
  (0, 5478)	0.07824622568378045
  (0, 5196)	0.18580071949878224
  (0, 8013)	0.31335828102133484
  (0, 7201)	0.34345774490895203
  (0, 5393)	0.33145575817209505
  (0, 149)	0.13067840165318165
  (0, 6350)	0.35893099698216996
  (0, 5224)	0.15539478216193722
  (0, 8079)	0.1574717176069545
  (0, 1168)	0.09824083215603922
  (0, 4701)	0.22380426533203865
  (0, 7533)	0.22538266035086094
  (0, 3177)	0.0928845746853722
  (0, 6079)	0.26889459290411516
  (0, 3495)	0.15458595571987008


# Classification

### Linear SVM

In [10]:
clf_svm = svm.SVC(kernel='linear')
clf_svm.fit(train_x_vectors, train_y)

test_x[0]

clf_svm.predict(test_x_vectors[0])

array(['POSITIVE'], dtype='<U8')

#### Decision Tree


In [11]:
clf_dec = DecisionTreeClassifier()
clf_dec.fit(train_x_vectors, train_y)

clf_dec.predict(test_x_vectors[0])

array(['NEGATIVE'], dtype='<U8')

#### Naive Bayes

In [12]:
clf_gnb = DecisionTreeClassifier()
clf_gnb.fit(train_x_vectors, train_y)

clf_gnb.predict(test_x_vectors[0])

array(['NEGATIVE'], dtype='<U8')

#### Logistic Regression

In [13]:
clf_log = LogisticRegression()
clf_log.fit(train_x_vectors, train_y)

clf_log.predict(test_x_vectors[0])

array(['POSITIVE'], dtype='<U8')

## Evaluation

In [14]:
print(clf_svm.score(test_x_vectors, test_y))
print(clf_dec.score(test_x_vectors, test_y))
print(clf_gnb.score(test_x_vectors, test_y))
print(clf_log.score(test_x_vectors, test_y))

0.8076923076923077
0.6442307692307693
0.6346153846153846
0.8052884615384616


In [15]:
f1_score(test_y, clf_svm.predict(test_x_vectors), average=None, labels=[Sentiment.POSITIVE, Sentiment.NEGATIVE])

array([0.80582524, 0.80952381])

In [16]:
test_y.count(Sentiment.POSITIVE)

208

In [17]:
test_set = ['did not enjoy', 'bad book, do not buy', 'good']
new_test = vectorizer.transform(test_set)

clf_svm.predict(new_test)

array(['NEGATIVE', 'NEGATIVE', 'POSITIVE'], dtype='<U8')

## Tuning model with Grid Search

In [18]:
parameters = {'kernel': ('linear','rbf'), 'C': (1,4,8,16,32)}

svc = svm.SVC()
clf = GridSearchCV(svc, parameters, cv=5)
clf.fit(train_x_vectors, train_y)


GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': (1, 4, 8, 16, 32), 'kernel': ('linear', 'rbf')})

In [19]:
print(clf.score(test_x_vectors, test_y))

0.8076923076923077


## Saving Model


In [20]:
with open('sentiment_classifier.pkl', 'wb') as f:
    pickle.dump(clf, f)

## Loading Model

In [21]:
with open('sentiment_classifier.pkl', 'rb') as f:
    loaded_clf = pickle.load(f)

In [22]:
print(test_x[84])

loaded_clf.predict(test_x_vectors[0])

Fantastic, simply fantastic!!  In previous books of the Blood Curse series, we have read of how the Jaegar vampires (Dark) treated innocents and even their own people, it is beyond cruel, horrendous, or atrocious, but this is how Saber was raised and has lived for 800 years; now he has been told he was stolen and raised in the wrong house, that he is really a Jadon vampire (Light), how can he just change overnight to become someone else and Vanya (the destiny of Saber and our heroine) what she had been through, how she had been coping with becoming the person she thought she should be for herself and her people, but inside her heart wishing she could become the person she wanted to be.  The continual angst that Saber mind was always going through was a deep-down heartfelt thing to read along with the continual battle of Vanya struggling with who she wanted to become.  Ms Dawn wrote Saber's character and story so you can actually feel his torment and anger coming off the pages and could

array(['POSITIVE'], dtype='<U8')