# Carrying out Sentiment Analysis:¶
Two options: Build your own, or use a handy python package!
We will try both

### 1: Build your own Sentiment Analysis model using Keras

In [2]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense
from keras.preprocessing import sequence
import numpy as np

Using TensorFlow backend.


### 2: Create the data

In [3]:
reviews = ['I really didnt like it',
'it was pure shite',
'it was great',
'as great as talking to nedra',
'waste of time',
'well worth it',
'awesome']

In [4]:
labels = [0,0,1,1,0,1,1]

### 3: One hot encode the text

In [5]:
vocab = []
max_length = 0


for review in reviews:
    review = review.lower().split()
    for word in review:
        vocab.append(word)
        if len(review) > max_length:
            max_length = len(review)

vocab = list(set(vocab))
vocab_size = len(vocab) +1

In [6]:
vocab_to_keys = {}
key_to_vocab = {}

embedded_reviews = []

for i in range(len(vocab)):
    vocab_to_keys[vocab[i]] = i+1
    key_to_vocab[i+1] = vocab[i]

In [7]:
embedded_docs = [[vocab_to_keys[x] for x in review.lower().split()] for review in reviews]

In [8]:
padded_docs = sequence.pad_sequences(embedded_docs, maxlen=max_length, padding='post')

In [10]:
padded_docs

array([[ 3,  2,  8,  9, 11,  0],
       [11, 18, 17,  6,  0,  0],
       [11, 18,  7,  0,  0,  0],
       [10,  7, 10, 16,  5,  4],
       [14, 15, 13,  0,  0,  0],
       [ 1, 12, 11,  0,  0,  0],
       [19,  0,  0,  0,  0,  0]], dtype=int32)

In [122]:
model = Sequential()
model.add(Embedding(vocab_size, 16, input_length=max_length))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy')

In [123]:
model.compile(optimizer='adam', loss='binary_crossentropy')

In [124]:
X = padded_docs
y = labels

model.fit(X,y, epochs=50,verbose=0)

<keras.callbacks.History at 0x12d17a978>

In [125]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 6, 16)             320       
_________________________________________________________________
flatten_7 (Flatten)          (None, 96)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 97        
Total params: 417
Trainable params: 417
Non-trainable params: 0
_________________________________________________________________


### 5: Predict new phrases

In [129]:
max_length = 6
new_reviews = ['it was really awesome','it was great like', 'awesome waste of time']
embedded_docs = [[vocab_to_keys[x] for x in review.lower().split()] for review in new_reviews]
padded_docs = sequence.pad_sequences(embedded_docs, maxlen=max_length, padding='post')

In [130]:
X = padded_docs
ypred = []
test_labels = [1,1,0]

ypred.append(model.predict(X))

In [131]:
ypred

[array([[0.538326 ],
        [0.4979284],
        [0.5366051]], dtype=float32)]

### 6: Use Vader to derive sentiment

In [134]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()

In [135]:
def print_sentiment_scores(sentence):
    snt = analyser.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(snt)))

In [137]:
print_sentiment_scores('it was really awesome')

it was really awesome------------------- {'neg': 0.0, 'neu': 0.406, 'pos': 0.594, 'compound': 0.659}


In [138]:
print_sentiment_scores('awesome waste of time')

awesome waste of time------------------- {'neg': 0.315, 'neu': 0.225, 'pos': 0.461, 'compound': 0.3182}
