In [1]:
import numpy as np
from utils import *
import matplotlib.pyplot as plt
np.random.seed(0)
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform
np.random.seed(1)
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
X_train, Y_train = read_csv('my_data/myTrainDataset.csv')
X_test, Y_test = read_csv('my_data/myTestDataset.csv')

In [3]:
maxLen = len(max(X_train, key=len).split()) + 1

In [4]:
X_train.shape

(223,)

In [5]:
Y_oh_train = convert_to_one_hot(Y_train, C=3)
# Y_oh_test = convert_to_one_hot(Y_train, C=3)

In [6]:
word_to_index, index_to_word, word_to_embedding_map = read_glove_embedding('my_data/glove.6B.50d.txt')

Read glove embedding


In [26]:
len(word_to_index)

400000

In [7]:
np.dot(word_to_embedding_map['did'], word_to_embedding_map['was'])

17.977195264457002

In [8]:
def sentences_to_indices(X, word_to_index, max_len):
    """
    X -- array of sentences, of shape (N, 1)
    word_to_index -- a dictionary mapping word to index
    max_len -- maximumm number of words in a sentence
    
    returns:
    X_indices -- array of indices of the words in the sentence of shape (N, max_len)
    """
    N = X.shape[0]
    X_indices = np.zeros((N, max_len))
    
    for i in range(N):
        words = X[i].lower().split()
        j=0
        for w in words:
            X_indices[i,j] = word_to_index[w]
            j = j+1
   
    return X_indices

In [9]:
def glove_embedding_layer(word_to_embedding_map, word_to_index):
    vocab_corpus_len = len(word_to_index) + 1
    emb_dim = word_to_embedding_map['bus'].shape[0]
    emb_matrix = np.zeros((vocab_corpus_len, emb_dim))
    
    for word, index in word_to_index.items():
        emb_matrix[index] = word_to_embedding_map[word]
    
    embedding_layer = Embedding(vocab_corpus_len, emb_dim, trainable=True)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer

In [10]:
embedding_layer = glove_embedding_layer(word_to_embedding_map, word_to_index)
print("weights[0][1][3] =", embedding_layer.get_weights()[0][1][3])

weights[0][1][3] = -0.3403


In [11]:
def Sentence_type_classification(input_shape, word_to_embedding_map, word_to_index):
    sentence_indices = Input(input_shape, dtype = 'int32')
    embedding_layer = glove_embedding_layer(word_to_embedding_map,word_to_index)
    embeddings = embedding_layer(sentence_indices)
    
    X = LSTM(128, return_sequences=True)(embeddings)
    X = Dropout(0.5)(X)
    #X = LSTM(128, return_sequences=True)(X)
    #X = Dropout(0.5)(X)
    X = LSTM(128, return_sequences=False)(X)
    X = Dropout(0.5)(X)
    X = Dense(3)(X)
    X = Activation('softmax')(X)
    
    model = Model(sentence_indices, X)
    
    return model

In [12]:
model = Sentence_type_classification((maxLen,), word_to_embedding_map, word_to_index)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 13)                0         
_________________________________________________________________
embedding_2 (Embedding)      (None, 13, 50)            20000050  
_________________________________________________________________
lstm_1 (LSTM)                (None, 13, 128)           91648     
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 128)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 387       
__________

In [13]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [14]:
X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
Y_train_oh = convert_to_one_hot(Y_train, C = 3)

In [15]:
model.fit(X_train_indices, Y_train_oh, epochs=60, batch_size=32, shuffle=True)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<keras.callbacks.History at 0x11ff06d68>

In [16]:
X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)
Y_test_oh = convert_to_one_hot(Y_test, C = 3)
loss, acc = model.evaluate(X_test_indices, Y_test_oh)
print()
print("Test accuracy = ", acc)


Test accuracy =  0.8666666746139526


In [17]:
X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen)
pred = model.predict(X_test_indices)

In [18]:
# This code allows you to see the mislabelled examples
C = 3

X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen)
pred = model.predict(X_test_indices)


for i in range(len(X_test)):
    num = np.argmax(pred[i])
    if(num != Y_test[i]):
        print('Expected :', Y_test[i], ' prediction: ', X_test[i],  num)

Expected : 1  prediction:  Water the plant once a week 0
Expected : 0  prediction:  To do or not to do is a question 2
Expected : 1  prediction:  Run for your life 2
Expected : 0  prediction:  Tomorrow is my birthday 2


In [19]:
x_test = np.array(['You can read the book'])
X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen)
print(x_test[0], np.argmax(model.predict(X_test_indices)))

You can read the book 1


In [34]:
4*(128*128+128*129)

131584