In [1]:
import cntk as C
import numpy as np

from IndRNN import IndRNNStep

In [2]:
vocab_size = 943 
num_intents = 26    

emb_dim    = 150
hidden_dim = 300

In [3]:
def create_reader(path, is_training):
    return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
         query         = C.io.StreamDef(field='S0', shape=vocab_size,  is_sparse=True),
         intent        = C.io.StreamDef(field='S1', shape=num_intents, is_sparse=True)
     )), randomize=is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)

def create_criterion_function(model, labels):
    ce   = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error      (model, labels)
    return ce, errs # (model, labels) -> (loss, error metric)

def create_model():
    with C.layers.default_options(initial_state=0.1):
        return C.layers.Sequential([
            C.layers.Embedding(emb_dim, name='embed'),
            C.layers.Stabilizer(),
            C.layers.Fold(IndRNNStep(hidden_dim), go_backwards=False),# IndRNN
            C.layers.Dense(num_intents, name='classify')
        ])

In [4]:
def train(reader, model_func, max_epochs=10):
    
    model = model_func(x)
    
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function(model, y)

    # training config
    epoch_size = 18000        # 18000 samples is half the dataset size 
    minibatch_size = 100
    
    # LR schedule over epochs 
    # In CNTK, an epoch is how often we get out of the minibatch loop to
    # do other stuff (e.g. checkpointing, adjust learning rate, etc.)
    lr_per_sample = [3e-4]*4+[1.5e-4]
    lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample]
    lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size)
    
    # Momentum schedule
    momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size)
    
    learner = C.adam(parameters=model.parameters,
                     lr=lr_schedule,
                     momentum=momentums, # gradient_clipping_threshold_per_sample=15, 
                    )

    # Setup the progress updater
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)

    # Instantiate the trainer
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # process minibatches and perform model training
    C.logging.log_number_of_parameters(model)

    data_map={x: reader.streams.query, y: reader.streams.intent } 

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            data = reader.next_minibatch(minibatch_size, input_map= data_map)  # fetch minibatch
            # print(data) # to figure out the dynamic axis
            trainer.train_minibatch(data)
            t += data[y].num_samples      
            if t % 6000 == 0:
                training_loss = trainer.previous_minibatch_loss_average
                error = trainer.previous_minibatch_evaluation_average
                print("epoch: {}, step: {}, loss: {:.5f}, error {:.5f}".format(epoch, t, training_loss, error))
        trainer.summarize_training_progress()

In [5]:
x = C.sequence.input_variable(vocab_size, name='x_input')
y = C.input_variable(num_intents, name='y_label')

In [6]:
def do_train():
    global z
    z = create_model()
    reader = create_reader('atis.train.ctf', is_training=True)
    train(reader, z, 5)
do_train()

Training 194877 parameters in 7 parameter tensors.
Learning rate per minibatch: 0.03
epoch: 0, step: 18000, loss: 0.03331, error 0.00000
Finished Epoch[1 of 5]: [Training] loss = 0.644701 * 18000, metric = 15.75% * 18000 12.699s (1417.4 samples/s);
Finished Epoch[2 of 5]: [Training] loss = 0.175354 * 18006, metric = 4.86% * 18006 12.270s (1467.5 samples/s);
Finished Epoch[3 of 5]: [Training] loss = 0.080272 * 17995, metric = 2.38% * 17995 13.042s (1379.8 samples/s);
Finished Epoch[4 of 5]: [Training] loss = 0.070479 * 18004, metric = 1.97% * 18004 12.262s (1468.3 samples/s);
Learning rate per minibatch: 0.015
epoch: 4, step: 78000, loss: 0.03660, error 0.00000
Finished Epoch[5 of 5]: [Training] loss = 0.013461 * 17999, metric = 0.39% * 17999 12.373s (1454.7 samples/s);


### RNN Result
```
Training 690477 parameters in 7 parameter tensors.
Learning rate per minibatch: 0.020999999999999998
Finished Epoch[1 of 5]: [Training] loss = 0.493163 * 18004, metric = 12.06% * 18004 57.973s (310.6 samples/s);
Finished Epoch[2 of 5]: [Training] loss = 0.102100 * 17998, metric = 2.68% * 17998 58.436s (308.0 samples/s);
Finished Epoch[3 of 5]: [Training] loss = 0.049302 * 18000, metric = 1.36% * 18000 57.371s (313.7 samples/s);
Finished Epoch[4 of 5]: [Training] loss = 0.034251 * 18000, metric = 1.00% * 18000 56.663s (317.7 samples/s);
Learning rate per minibatch: 0.010499999999999999
Finished Epoch[5 of 5]: [Training] loss = 0.005382 * 17998, metric = 0.13% * 17998 56.685s (317.5 samples/s);
```