# Version III

## Model
This model is an LSTM with the following structure:

```        
                          *Recurrence*
                      ---------------------
IN --> Embedding --> | Bidirection LSTM    | --> Softmax
                      ---------------------
```
Results:

25 Epochs

Train:
     error: 0.000000
     loss: 0.003679

Validation:
    Finished Evaluation [1]: Minibatch[1-358]: metric = 0.14% * 400;
    
    
Results:

With adadelta:

25 epochs

Train:
    error: 0.000000
     loss: 0.000199
     
Validation:
    Finished Evaluation [1]: Minibatch[1-358]: metric = 0.00% * 400;

In [4]:
%load_ext autoreload
%autoreload 1
import cntk as C
import random
import sys
import os
from cntk import Trainer, Axis
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs,\
        INFINITELY_REPEAT
from cntk.learners import adam, sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
from cntk import input_variable, cross_entropy_with_softmax, \
        classification_error, sequence
from cntk.logging import ProgressPrinter
from cntk.layers import Sequential, Embedding, Recurrence, LSTM, Dense, BatchNormalization

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import math
import numpy as np
import cntk.tests.test_utils
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
import math
import numpy as np
import cntk.tests.test_utils
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
C.cntk_py.set_fixed_random_seed(1) # fix a random seed for CNTK componentsk_py.set_fixed_random_seed(1) # fix a random seed for CNTK components

In [9]:
# define dimensions
input_dim =  38482
num_output_classes = 3
features = sequence.input_variable(shape=input_dim, is_sparse=True)
label = input_variable(num_output_classes)

In [7]:
# Creates the reader
def create_reader(path, is_training, input_dim, label_dim):
    """
        Creates a reader that loads data from file.
        
        Args:
            path: is the relative path to the data file.
            is_training: if reader is used for training
            input_dim: the input dim of a word (vocab size)
            label_dim: the number of classes
    """
    return MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='S0', shape=input_dim, is_sparse=True),
        labels=StreamDef(field='S1', shape=label_dim, is_sparse=False)
        )), randomize=is_training,
        max_sweeps=INFINITELY_REPEAT if is_training else 1)

def create_lstm_v_1():
    """
        Wrapper function that maintains chosen paramets.
    """
    cell_dim = 50
    hidden_dim = 150
    embedding_dim = 100
    return create_model(num_output_classes, embedding_dim, hidden_dim, cell_dim)


def BiRecurrence(fwd, bwd):
    F = C.layers.Recurrence(fwd)
    G = C.layers.Recurrence(bwd, go_backwards=True)
    x = C.placeholder()
    apply_x = C.splice(F(x), G(x))
    return apply_x


# Defines the LSTM model for classifying sequences
def create_model(num_output_classes, embedding_dim,
                                hidden_dim, cell_dim):
    with C.layers.default_options(initial_state=0.1):
        return Sequential([
            Embedding(embedding_dim, name='embed'),
            BiRecurrence(C.layers.LSTM(hidden_dim//2), C.layers.LSTM(hidden_dim//2)),
            sequence.last,
            Dense(num_output_classes, name='classify')])
    


# Creates and trains a LSTM sequence classification model
def train_sequence_classifier(reader, model_func, max_epochs=25):
    """
        Trains a model on sequences.
        
        Args:
            reader - the data source that yields data from a source file.
            model_func - defined model instance
            max_epochs - number of epochs to train
        
    """
    # Init model
    model = model_func(features)
    
    # Constants
    minibatch_size = 80
    samples = 182584
    minibatch_per_epoch = samples / minibatch_size
    
    # Cross Entropy and Classification Error
    ce = cross_entropy_with_softmax(model, label)
    pe = classification_error(model, label)

    input_map = {
        features: reader.streams.features,
        label:    reader.streams.labels
    }
     
    lr_schedule = C.learning_parameter_schedule(1, minibatch_size=C.learners.IGNORE)
    t_schedule = C.momentum_schedule(0.971, minibatch_size=C.learners.IGNORE)
    learner = adadelta = C.adadelta(z.parameters, lr_schedule, 0.999, 1e-6)
    # learner = C.adam(z.parameters, lr_schedule, t_schedule, unit_gain=False)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs, metric_is_pct=False)

    trainer = Trainer(model, (ce, pe),
                      learner, # sgd(model.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    t = 0
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * minibatch_per_epoch
        while t < epoch_end:
            mb = reader.next_minibatch(minibatch_size, input_map=input_map)
            trainer.train_minibatch(mb)
            t += mb[label].num_samples  
        trainer.summarize_training_progress()
    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average

In [29]:
def do_train():
    """
        Wrapper function for training the network.
    """
    global z
    path = ("data/rotten_imdb/imdb.train.ctf")
    reader = create_reader(path, True, input_dim, num_output_classes)
    z = create_lstm_v_1()
    error, loss = train_sequence_classifier(reader, z)
    print(" error: %f" % error)
    print(" loss: %f" % loss)

def m_do_train(config):
    """
        Wrapper function for training the network.
    """
    global z
    z = create_lstm_v_1()
    
    for c in config:
        path = c['path']
        iterations = c['iterations']
    
        reader = create_reader(path, True, input_dim, num_output_classes)
        error, loss = train_sequence_classifier(reader, z, iterations)

        print(" error: %f" % error)
        print(" loss: %f" % loss)
    return z
config = [
    {
        "path": "data/rotten_imdb/imdb.train.ctf",
        "iterations": 25
    }
]
m_do_train(config)

Learning rate per minibatch: 1.0


KeyboardInterrupt: 

RuntimeError: SWIG director method error.

In [30]:
def create_criterion_function_preferred(model, labels):
    """
        Creates a tuple of criterions, softmax and classification error
        
        Args:
            model - that is being trained.
            labels - labels to compare with (cntk Variable)
        Returns:
            Tuple of functions: (softmax, classification)
    """
    ce   = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error      (model, labels)
    return ce, errs # (model, labels) -> (loss, error metric)

def evaluate(reader, model_func):
    """
        Evaluates the model given a reader (data source) and
        the trained model.
        
        Args:
            reader - data source that reads from files.
            model_func - the trained model.
    """
    # Instantiate the model function; x is the input (feature) variable
    model = model_func(features)

    # Create the loss and error functions
    loss, label_error = create_criterion_function_preferred(model, label)

    # process minibatches and perform evaluation
    progress_printer = C.logging.ProgressPrinter(tag='Evaluation', num_epochs=0)

    while True:
        minibatch_size = 32
        data = reader.next_minibatch(minibatch_size, input_map={  # fetch minibatch
            features: reader.streams.features,
            label: reader.streams.labels
        })
        if not data:                                 # until we hit the end
            break

        evaluator = C.eval.Evaluator(loss, progress_printer)
        evaluator.test_minibatch(data)

    evaluator.summarize_test_progress()

In [31]:
def do_test(path):
    """ Tests the network on the validation set. """
    input_dim =  38482
    num_output_classes = 3
    cell_dim = 50
    hidden_dim = 150
    embedding_dim = 100

    reader = create_reader(path, False, input_dim, num_output_classes)
    evaluate(reader, z)

In [35]:
config = [
    {
        "path": "data/rotten_imdb/imdb.train.ctf",
        "iterations": 25
    },
    {
        "path": "data/bbc/aggregated/bbc.250.init.train.ctf",
        "iterations": 5
    }
]

z = m_do_train(config)

do_test("data/rotten_imdb/imdb.val.ctf")
do_test("data/bbc/aggregated/bbc.250.init.val.ctf")
do_test("data/bbc/aggregated/bbc.250.init.test.ctf")


Learning rate per minibatch: 1.0
Finished Epoch[1 of 25]: [Training] loss = 0.709728 * 2282, metric = 0.464505 * 2282 16.882s (135.2 samples/s);
Finished Epoch[2 of 25]: [Training] loss = 0.570168 * 2284, metric = 0.278459 * 2284 16.169s (141.3 samples/s);
Finished Epoch[3 of 25]: [Training] loss = 0.353434 * 2280, metric = 0.143860 * 2280 14.595s (156.2 samples/s);
Finished Epoch[4 of 25]: [Training] loss = 0.239553 * 2284, metric = 0.092382 * 2284 14.523s (157.3 samples/s);
Finished Epoch[5 of 25]: [Training] loss = 0.164662 * 2281, metric = 0.056993 * 2281 14.900s (153.1 samples/s);
Finished Epoch[6 of 25]: [Training] loss = 0.179889 * 2282, metric = 0.065294 * 2282 14.839s (153.8 samples/s);
Finished Epoch[7 of 25]: [Training] loss = 0.135258 * 2281, metric = 0.046909 * 2281 14.952s (152.6 samples/s);
Finished Epoch[8 of 25]: [Training] loss = 0.049729 * 2283, metric = 0.013579 * 2283 14.762s (154.7 samples/s);
Finished Epoch[9 of 25]: [Training] loss = 0.038919 * 2282, metric = 0.

Finished Evaluation [1]: Minibatch[1-104]: metric = 0.00% * 124;


In [44]:
z.save("imbd.v3-1.cntk.mdl")

In [45]:
from cntk import load_model
loaded_model = load_model("imbd.v3-1.cntk.mdl")