# Sentiment Classification


## Loading the dataset

In [107]:
from keras.datasets import imdb

vocab_size = 10000 #vocab size
#load dataset as a list of ints
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size) # vocab_size is no.of words to consider from the dataset, ordering based on frequency.

In [108]:
# A dictionary mapping words to an integer index
vocabulary = imdb.get_word_index()

# The first indices are reserved
vocabulary = {k:(v+3) for k,v in vocabulary.items()} 
vocabulary["<PAD>"] = 0
# See how integer 1 appears first in the review above. 
vocabulary["<START>"] = 1
vocabulary["<UNK>"] = 2  # unknown
vocabulary["<UNUSED>"] = 3

# reversing the vocabulary. 
# in the index, the key is an integer, 
# and the value is the corresponding word.
index = dict([(value, key) for (key, value) in vocabulary.items()])

def decode_review(text):
    '''converts encoded text to human readable form.
    each integer in the text is looked up in the index, and 
    replaced by the corresponding word.
    '''
    return ' '.join([index.get(i, '?') for i in text])

In [109]:
#Verifying the order of key and value from dictionary
vocabulary.get('fawn',' '), index.get(34704)

(34704, 'fawn')

In [110]:
decode_review(x_train[0])

"<START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also <UNK> to the two little boy's that played the <UNK> of norman and paul they were just brilliant children are often left out of the <UNK> list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for wh

In [111]:
import keras
from keras.preprocessing.sequence import pad_sequences
maxlen = 300  #number of word used from each review

In [112]:
x_train.shape,y_train.shape

((25000,), (25000,))

In [113]:
x_test.shape,y_test.shape

((25000,), (25000,))

## Train test split

In [114]:
#make all sequences of the same length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test =  pad_sequences(x_test, maxlen=maxlen)

In [115]:
x_train.shape, x_test.shape

((25000, 300), (25000, 300))

## Build Keras Embedding Layer Model
We can think of the Embedding layer as a dicionary that maps a index assigned to a word to a word vector. This layer is very flexible and can be used in a few ways:

* The embedding layer can be used at the start of a larger deep learning model. 
* Also we could load pre-train word embeddings into the embedding layer when we create our model.
* Use the embedding layer to train our own word2vec models.

The keras embedding layer doesn't require us to onehot encode our words, instead we have to give each word a unqiue intger number as an id. For the imdb dataset we've loaded this has already been done, but if this wasn't the case we could use sklearn [LabelEncoder](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html).

In [116]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

In [117]:
# create the model - simple model with embedding layer & few dense layers
model = Sequential()
model.add(Embedding(vocab_size, 32, input_length=maxlen))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 300, 32)           320000    
_________________________________________________________________
flatten_2 (Flatten)          (None, 9600)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 250)               2400250   
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 251       
Total params: 2,720,501
Trainable params: 2,720,501
Non-trainable params: 0
_________________________________________________________________
None


In [118]:
# Fit the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=2, batch_size=128, verbose=2)
# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Train on 25000 samples, validate on 25000 samples
Epoch 1/2
 - 21s - loss: 0.4645 - acc: 0.7517 - val_loss: 0.2929 - val_acc: 0.8750
Epoch 2/2
 - 22s - loss: 0.1367 - acc: 0.9531 - val_loss: 0.3394 - val_acc: 0.8633
Accuracy: 86.33%


## Retrive the output of each layer in keras for a given single test sample from the trained model you built

In [121]:
#Selecting one example from test set and verifying the shape of the same
print(x_test[0].shape)

(300,)


In [83]:
#Checking the shape of all the input and output to and from layers 
print(model.layers[0].input.shape,model.layers[0].output.shape)
print(model.layers[1].input.shape,model.layers[1].output.shape)
print(model.layers[2].input.shape,model.layers[2].output.shape)
print(model.layers[3].input.shape,model.layers[3].output.shape)

(?, 300) (?, 300, 32)
(?, 300, 32) (?, ?)
(?, ?) (?, 250)
(?, 250) (?, 1)


In [129]:
#Need to reshape the x_test as the model expects (n,300) as the input
a = x_test[0].reshape(-1,300)
a.shape

(1, 300)

In [20]:
model.layers

[<keras.layers.embeddings.Embedding at 0x24c511fa588>,
 <keras.layers.core.Flatten at 0x24c416d9d68>,
 <keras.layers.core.Dense at 0x24c3b4a0748>,
 <keras.layers.core.Dense at 0x24c511fa9b0>]

In [153]:
from keras import backend as K
import numpy as np

inputs = model.input               # input placeholder

outputs = [layer.output for layer in model.layers] # all layer outputs

#We can make use of K.learning_phase() is required as an input as many Keras layers like Dropout/Batchnomalization-
#depend on it to change behavior during training and test time.

funct =[K.function([inputs], [out]) for out in outputs] # evaluation function

# Passing the input from test set 
layer_outputs = [func([a]) for func in funct]

# printing the outputs of layers
print('Total Outputs: ',len(layer_outputs))
print('layer0 output shape: ',layer_outputs[0][0].shape)
print('layer1 output shape: ',layer_outputs[1][0].shape)
print('layer2 output shape: ',layer_outputs[2][0].shape)
print('layer3 output shape: ',layer_outputs[3][0].shape)

Total Outputs:  4
layer0 output shape:  (1, 300, 32)
layer1 output shape:  (1, 9600)
layer2 output shape:  (1, 250)
layer3 output shape:  (1, 1)


In [160]:
#predicting the outcome for the selected example
model.predict_classes(a)

array([[0]])