# High-level LSTM CNTK Example

In [4]:
print("Not working for some reason ...")

Not working for some reason ...


In [5]:
import numpy as np
import os
import sys
import cntk
from cntk.layers import Embedding, LSTM, Dense, Recurrence
from cntk import sequence
from common.params_lstm import *
from common.utils import *

In [6]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("CNTK: ", cntk.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.13.1
CNTK:  2.0
GPU:  ['Tesla M60', 'Tesla M60', 'Tesla M60', 'Tesla M60']


In [34]:
def create_symbol():
    # Weight initialiser from uniform distribution
    # Activation (unless states) is None
    with cntk.layers.default_options(init = cntk.glorot_uniform()):
        x = Embedding(EMBEDSIZE)(features)
        # Fold same as recurrence but returns only final-state
        x = Recurrence(LSTM(NUMHIDDEN))(x)
        x = sequence.last(x)
        x = Dense(2)(x)
        return x

In [35]:
def init_model(m):
    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)  
    # ADAM
    #Alpha is the learning_rate
    #Beta1 is momentum parameter
    #Beta2 is variance_momentum parameter  
    learner = cntk.adam(m.parameters,
                        lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch) ,
                        momentum=cntk.momentum_schedule(BETA_1), 
                        variance_momentum=cntk.momentum_schedule(BETA_2),
                        epsilon=EPS,
                        unit_gain=False)
    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])
    return trainer

In [36]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True)# CNTK format
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.float32)
x_test = x_test.astype(np.float32)
y_test = y_test.astype(np.float32)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Downloading https://s3.amazonaws.com/text-datasets/imdb.npz
Done.
Extracting files...
Done.
Trimming to 20000 max-features
Padding to length 150
(25000, 150) (25000, 150) (25000, 2) (25000, 2)
float32 float32 float32 float32
CPU times: user 5.97 s, sys: 232 ms, total: 6.2 s
Wall time: 7.31 s


In [37]:
%%time
# Placeholders
features = sequence.input_variable(shape=MAXLEN, is_sparse=False)
labels = cntk.input_variable(2)

# Load symbol
sym = create_symbol()

print(features)
print(labels)

Input('Input2465', [#, *], [150])
Input('Input2466', [#], [2])
CPU times: user 12 ms, sys: 0 ns, total: 12 ms
Wall time: 11.4 ms


In [38]:
%%time
trainer = init_model(sym)

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 4.13 ms


In [44]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        data = data.reshape(BATCHSIZE, 1, MAXLEN)
        label = label.reshape(BATCHSIZE, 1, 2)
        trainer.train_minibatch({features: data, labels: label})

    # Log (this is just last batch in epoch, not average of batches)
    eval_error = trainer.previous_minibatch_evaluation_average
    print("Epoch %d  |  Accuracy: %.6f" % (j+1, (1-eval_error)))

Epoch 1  |  Accuracy: 0.546875
Epoch 2  |  Accuracy: 0.656250
Epoch 3  |  Accuracy: 0.531250
CPU times: user 2.29 s, sys: 64 ms, total: 2.35 s
Wall time: 2.35 s


In [45]:
%%time
# Predict and then score accuracy
# Apply softmax since that is only applied at training
# with cross-entropy loss
z = cntk.softmax(sym)
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = np.argmax(y_test[:n_samples], axis=-1)
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    predicted_label_probs = z.eval({features : data})
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)
    c += 1

CPU times: user 3.18 s, sys: 40 ms, total: 3.22 s
Wall time: 3.22 s


In [46]:
print("Accuracy: ", sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.511057692308
