### Step 0: Preprocess Data

In [4]:
from keras.preprocessing import sequence
from keras.datasets import imdb

# https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb/load_data
# num_words: max number of words to include. 
# Words are ranked by how often they occur (in the training set) and only the most frequent words are kept
max_words= 20000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

Using TensorFlow backend.


In [5]:
X_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]),
       list([1, 194, 1153, 194, 82

In [6]:
X_train.shape

(25000,)

In [7]:
len(X_train[0])

218

In [8]:
len(X_train[1])

189

In [0]:
# https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences
# maxlen: maximum length of all sequences.
# Sequences that are shorter than maxlen are padded with value 0 by default
# Sequences longer than maxlen are truncated so that they fit the desired length.

max_len = 80
X_train = sequence.pad_sequences(X_train,maxlen=max_len,padding='pre', truncating='pre')
X_test = sequence.pad_sequences(X_test,maxlen=max_len)

In [10]:
X_train.shape

(25000, 80)

In [11]:
X_train[:1,:]

array([[   15,   256,     4,     2,     7,  3766,     5,   723,    36,
           71,    43,   530,   476,    26,   400,   317,    46,     7,
            4, 12118,  1029,    13,   104,    88,     4,   381,    15,
          297,    98,    32,  2071,    56,    26,   141,     6,   194,
         7486,    18,     4,   226,    22,    21,   134,   476,    26,
          480,     5,   144,    30,  5535,    18,    51,    36,    28,
          224,    92,    25,   104,     4,   226,    65,    16,    38,
         1334,    88,    12,    16,   283,     5,    16,  4472,   113,
          103,    32,    15,    16,  5345,    19,   178,    32]],
      dtype=int32)

In [12]:
y_train.shape

(25000,)

In [13]:
y_train[:10]

array([1, 0, 0, 1, 0, 0, 1, 0, 1, 0])

In [0]:
# One-hot encode label
import numpy as np
n_classes = len(np.unique(y_train)) # n_classes = 2
y_train = np.eye(n_classes)[y_train]
y_test = np.eye(n_classes)[y_test]

In [15]:
y_train[:10]

array([[0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.]])

### Embedding

In [16]:
vocab = ['the','like','between','did','just','national','day','country','under','such','second']

emb = np.array([[0.418, 0.24968, -0.41242, 0.1217, 0.34527, -0.044457, -0.49688, -0.17862],
   [0.36808, 0.20834, -0.22319, 0.046283, 0.20098, 0.27515, -0.77127, -0.76804],
   [0.7503, 0.71623, -0.27033, 0.20059, -0.17008, 0.68568, -0.061672, -0.054638],
   [0.042523, -0.21172, 0.044739, -0.19248, 0.26224, 0.0043991, -0.88195, 0.55184],
   [0.17698, 0.065221, 0.28548, -0.4243, 0.7499, -0.14892, -0.66786, 0.11788],
   [-1.1105, 0.94945, -0.17078, 0.93037, -0.2477, -0.70633, -0.8649, -0.56118],
   [0.11626, 0.53897, -0.39514, -0.26027, 0.57706, -0.79198, -0.88374, 0.30119],
   [-0.13531, 0.15485, -0.07309, 0.034013, -0.054457, -0.20541, -0.60086, -0.22407],
   [ 0.13721, -0.295, -0.05916, -0.59235, 0.02301, 0.21884, -0.34254, -0.70213],
   [ 0.61012, 0.33512, -0.53499, 0.36139, -0.39866, 0.70627, -0.18699, -0.77246 ],
   [ -0.29809, 0.28069, 0.087102, 0.54455, 0.70003, 0.44778, -0.72565, 0.62309 ]])


emb.shape

(11, 8)

In [19]:
# embedding_lookup
import tensorflow as tf
a = tf.constant([[1,2], [3,4], [5,6]])
ids = tf.constant([0,2,1])
result = tf.nn.embedding_lookup(a, ids)
tf.Session().run(result)

array([[1, 2],
       [5, 6],
       [3, 4]], dtype=int32)

In [0]:
from collections import OrderedDict

In [0]:
# embedding as TF tensor (for now constant; could be tf.Variable() during training)
tf_embedding = tf.constant(emb, dtype=tf.float32)

In [0]:
# input for which we need the embedding
input_str = "like the country"

In [23]:
word_to_idx = OrderedDict({w:vocab.index(w) for w in input_str.split() if w in vocab})
word_to_idx

OrderedDict([('like', 1), ('the', 0), ('country', 7)])

In [24]:
# lookup in embedding matrix & return the vectors for the input words
tf.Session().run(tf.nn.embedding_lookup(tf_embedding, list(word_to_idx.values())))

array([[ 0.36808 ,  0.20834 , -0.22319 ,  0.046283,  0.20098 ,  0.27515 ,
        -0.77127 , -0.76804 ],
       [ 0.418   ,  0.24968 , -0.41242 ,  0.1217  ,  0.34527 , -0.044457,
        -0.49688 , -0.17862 ],
       [-0.13531 ,  0.15485 , -0.07309 ,  0.034013, -0.054457, -0.20541 ,
        -0.60086 , -0.22407 ]], dtype=float32)

In [0]:
# Step 1: Initial Setup
import tensorflow as tf
X = tf.placeholder(tf.int32,[None,max_len])
y = tf.placeholder(tf.int32)

In [0]:
rnn_size = 32
embedding_size = 128
n_classes = 2
W = tf.Variable(tf.truncated_normal([rnn_size,n_classes],stddev=0.1))
B = tf.Variable(tf.truncated_normal([n_classes],stddev=0.1))
embedding = tf.Variable(tf.random_uniform([max_words, embedding_size], -1.0, 1.0))

In [0]:
# Step 2 Model
x_embedded = tf.nn.embedding_lookup(embedding,X)
cell = tf.nn.rnn_cell.LSTMCell(rnn_size, name='LSTM5')

outputs, final_output_state = tf.nn.dynamic_rnn(cell,x_embedded,dtype=tf.float32)

Ylogits= tf.matmul(outputs[:, -1],W)+B
yhat = tf.nn.softmax(Ylogits)

In [53]:
# for an input with max sequence length of T time steps 
# outputs is of the shape [Batch_size, time_step (max_words), num_inputs (rnn_size)] 
# and it contains the output state at each timestep h1, h2.....hT.
outputs.shape

TensorShape([Dimension(None), Dimension(80), Dimension(32)])

In [57]:
# final_output_state is of the shape [Batch_size,num_inputs] and 
# has the final cell state cT and output state hT of each batch sequence.
final_output_state

LSTMStateTuple(c=<tf.Tensor 'rnn_7/while/Exit_3:0' shape=(?, 32) dtype=float32>, h=<tf.Tensor 'rnn_7/while/Exit_4:0' shape=(?, 32) dtype=float32>)

In [0]:
# Step 3 Loss Function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y,logits=Ylogits))

In [0]:
# Step 4 Optimizer
lr = 0.01
train = tf.train.AdamOptimizer(lr).minimize(loss)

In [47]:
is_correct = tf.equal(tf.argmax(y,1),tf.argmax(yhat,1))
accuracy = tf.reduce_mean(tf.cast(is_correct,tf.float32))

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

training_epochs = 5
batch_size = 100
# Step 5: Training Loop
for epoch in range(training_epochs):
    for i in range(int(X_train.shape[0] / batch_size)):
        batch_X = X_train[(i*batch_size):((i+1)*batch_size)]
        batch_y = y_train[(i*batch_size):((i+1)*batch_size)]
        train_data = {X: batch_X, y: batch_y}
        sess.run(train, feed_dict=train_data)
        if i%100 == 0:
            print(epoch, "/", i,"Training Accuracy = ", sess.run(accuracy, feed_dict=train_data))

0 / 0 Training Accuracy =  0.81
0 / 100 Training Accuracy =  0.74
0 / 200 Training Accuracy =  0.87
1 / 0 Training Accuracy =  0.92
1 / 100 Training Accuracy =  0.92
1 / 200 Training Accuracy =  0.93
2 / 0 Training Accuracy =  0.96
2 / 100 Training Accuracy =  0.96
2 / 200 Training Accuracy =  0.97
3 / 0 Training Accuracy =  0.98
3 / 100 Training Accuracy =  0.97
3 / 200 Training Accuracy =  0.99
4 / 0 Training Accuracy =  1.0
4 / 100 Training Accuracy =  0.97
4 / 200 Training Accuracy =  0.99


In [0]:
test_predict = sess.run(yhat, feed_dict = {X: X_test})

In [49]:
test_predict

array([[0.5306051 , 0.46939498],
       [0.00583245, 0.99416757],
       [0.17719038, 0.8228096 ],
       ...,
       [0.71862876, 0.28137124],
       [0.99175996, 0.00824007],
       [0.971881  , 0.02811901]], dtype=float32)

In [50]:
sess.run(tf.argmax(test_predict,1))

array([0, 1, 1, ..., 0, 0, 0])

In [51]:
sess.run(accuracy, feed_dict = {X: X_test, y: y_test})

0.80888

In [0]:
acc = []
for i in range(int(X_test.shape[0] / batch_size)):
    batch_X = X_test[(i*batch_size):((i+1)*batch_size)]
    batch_y = y_test[(i*batch_size):((i+1)*batch_size)]
    test_data = {X: batch_X, y: batch_y}
    sess.run(train, feed_dict = test_data)
    acc.append(sess.run(accuracy, feed_dict = test_data))

In [0]:
acc

[0.89,
 0.85,
 0.88,
 0.83,
 0.92,
 0.9,
 0.83,
 0.84,
 0.81,
 0.8,
 0.85,
 0.79,
 0.82,
 0.78,
 0.87,
 0.86,
 0.87,
 0.83,
 0.89,
 0.82,
 0.84,
 0.86,
 0.83,
 0.81,
 0.86,
 0.83,
 0.87,
 0.87,
 0.87,
 0.86,
 0.83,
 0.86,
 0.88,
 0.91,
 0.84,
 0.81,
 0.86,
 0.89,
 0.83,
 0.81,
 0.93,
 0.83,
 0.84,
 0.89,
 0.91,
 0.91,
 0.93,
 0.88,
 0.85,
 0.86,
 0.83,
 0.88,
 0.9,
 0.9,
 0.82,
 0.92,
 0.88,
 0.89,
 0.93,
 0.82,
 0.88,
 0.81,
 0.82,
 0.84,
 0.85,
 0.91,
 0.93,
 0.87,
 0.87,
 0.93,
 0.9,
 0.91,
 0.89,
 0.91,
 0.85,
 0.87,
 0.9,
 0.92,
 0.88,
 0.88,
 0.89,
 0.85,
 0.94,
 0.91,
 0.84,
 0.83,
 0.89,
 0.88,
 0.89,
 0.87,
 0.86,
 0.78,
 0.81,
 0.9,
 0.78,
 0.88,
 0.85,
 0.87,
 0.88,
 0.85,
 0.87,
 0.85,
 0.86,
 0.8,
 0.89,
 0.87,
 0.88,
 0.85,
 0.89,
 0.91,
 0.81,
 0.83,
 0.82,
 0.87,
 0.83,
 0.92,
 0.86,
 0.88,
 0.87,
 0.8,
 0.93,
 0.89,
 0.89,
 0.87,
 0.93,
 0.85,
 0.82,
 0.88,
 0.89,
 0.92,
 0.91,
 0.86,
 0.84,
 0.9,
 0.9,
 0.82,
 0.9,
 0.89,
 0.83,
 0.88,
 0.95,
 0.89,
 0.89,
 0.9,
 0.86