In [1]:
import os
import json
import itertools
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import cntk
from cntk.io import StreamDef, StreamDefs, MinibatchSource, CBFDeserializer

In [2]:
frame_height = 120
frame_width = 120
num_channels = 1
sequence_length = 20
num_classes = 66

In [3]:
input_axis = cntk.Axis('inputAxis')
label_axis = cntk.Axis('labelAxis')
input_sequence = cntk.layers.SequenceOver[input_axis]
label_sequence = cntk.layers.SequenceOver[label_axis]

In [4]:
sentence_start = np.zeros(66)
sentence_start[64] = 1
sentence_start = cntk.Constant(sentence_start, dtype=np.float32)
sentence_end_index = 65

In [5]:
with open('./output/label.json', 'r') as jfile:
    label_dict = json.load(jfile)
    label_dict['BOS'] = 64
    label_dict['EOS'] = 65

i2w = {label_dict[w]: w for w in label_dict}
pairs = sorted([(k, label_dict[k]) for k in label_dict], key=lambda x: x[1])
classes = [k[0] for k in pairs][:-2]

In [6]:
def cbf_reader(path, is_training, max_samples):
    """
    Returns a MinibatchSource for data at the given path
    :param path: Path to a CBF file
    :param is_training: Set to true if reader is for training set, else false
    :param max_samples: Max no. of samples to read
    """
    deserializer = CBFDeserializer(path, StreamDefs(
        label=StreamDef(field='label', shape=num_classes, is_sparse=True),
        pixels=StreamDef(field='pixels', shape=num_channels * frame_height * frame_width, is_sparse=False)
    ))

    return MinibatchSource(deserializer, randomize=is_training, max_samples=max_samples)

In [7]:
def create_model_greedy(s2smodel, input_sequence, sentence_start):
    @cntk.Function
    @cntk.layers.Signature(input_sequence[cntk.layers.Tensor[num_channels, frame_height, frame_width]])
    def model_greedy(input_var):
        # Subtract previous frame from next frame
        s1 = cntk.sequence.slice(input_var, 1, 20)
        s2 = cntk.sequence.slice(input_var, 0, 19)
        layer_input = s1 - s2
        
        unfold = cntk.layers.UnfoldFrom(lambda history: s2smodel(history, layer_input) >> cntk.hardmax, length_increase=0.1)
        return unfold(initial_state=sentence_start, dynamic_axes_like=input_var)

    return model_greedy

In [8]:
def create_sparse_to_dense(vocab_dim, input_sequence):
    """
    Dummy function for printing the input sequence.
    """
    i = cntk.Constant(np.eye(vocab_dim))

    @cntk.Function
    @cntk.layers.Signature(input_sequence[cntk.layers.SparseTensor[vocab_dim]])
    def no_op(input_var):
        return cntk.times(input_var, i)

    return no_op

In [9]:
def format_sequences(sequences, i2w):
    """
    Given a tensor and vocabulary, print the output
    """
    return [' '.join([i2w[np.argmax(w)]]) for w in sequences]

In [10]:
def display(frame):
    plt.imshow(frame, cmap='gray')
    plt.show()

In [11]:
def stitch(output):
    filters, count, width, height = output.shape
    img = np.empty((height * filters, width * count))
    
    for i in range(filters):
        for j in range(count):
            sub = output[i][j]
            img[i*height: i*height + height, j*width: j*width + width] = sub
    
    return img

In [12]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2
    for i,j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                horizontalalignment='center',
                color='white' if cm[i, j] > thresh else 'black')
        
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [13]:
def test(model_greedy, reader, sparse_to_dense, i2w):
    # List of (target, prediction)
    result = []
    while True:
        mb = reader.next_minibatch(1)
        if not mb:
            break

        label = sparse_to_dense(mb[reader.streams.label])[0][1]
        outputs = model_greedy(mb[reader.streams.pixels])       
        result.append((label, outputs[-1]))
        
    return result

In [14]:
# Create output directory if it doesn't already exist
result_dir = './results'
img_dir = './images'

if not os.path.exists(result_dir):
    os.makedirs(result_dir)
    
if not os.path.exists(img_dir):
    os.makedirs(img_dir)

In [15]:
prefixes = ['']
for i in range(10):
    prefixes.append(i)

In [16]:
sparse_to_dense = create_sparse_to_dense(num_classes, input_sequence)

In [21]:
for prefix in prefixes:
    test_file = r'.\dataset\sequential\test{}.cbf'.format(prefix)
    model_path = r'.\models\run{}\final_model.dnn'.format(prefix)
    
    # Load the trained model
    model = cntk.load_model(model_path)
    greedy_model = create_model_greedy(model, input_sequence, sentence_start)
    
    # Create a reader (test set)
    reader = cbf_reader(test_file, is_training=False, max_samples=cntk.io.FULL_DATA_SWEEP)
    
    # Evaluate the model on the test set
    result = test(greedy_model, reader, sparse_to_dense, i2w)
    indices = [(str(t.argmax()), str(p.argmax())) for t, p in result]
    
    # Save the results for later
    with open(os.path.join(result_dir, 'result{}.json'.format(prefix)), 'w') as out_file:
        json.dump(indices, out_file)

In [16]:
# Create a reader (test set)
reader = cbf_reader(test_file, is_training=False, max_samples=cntk.io.FULL_DATA_SWEEP)

In [17]:
# Read a single sample and its target label
mb = reader.next_minibatch(1)

frames = []
seq = mb[reader.streams.pixels].as_sequences()[0]
for i in range(len(seq)):
    frames.append(seq[i].reshape((num_channels, frame_height, frame_width)))

label = mb[reader.streams.label].asarray()[0][1].argmax()

  ' Returning dense data.' % str(dense_data.shape))


In [18]:
out = greedy_model.eval([frames])
print('Expected {}, got {}'.format(label, out[-1].argmax()))

Expected 1, got 1


In [19]:
nodes = cntk.logging.get_node_outputs(greedy_model)

In [20]:
# Visualize outputs of convolution and pooling layers on sample
for node in nodes:
    if not ('conv' in node.name or 'pool' in node.name):
        continue
        
    layer = cntk.combine([greedy_model.find_by_name(node.name)])
    out = layer.eval([frames])
    
    one = np.array([out[i][0] for i in range(len(out))])
    img = stitch(one)
    plt.imsave(os.path.join(img_dir, '{}_{}.png'.format(prefix, node.name)), img, cmap='gray')   