# Lab 2 - Fully Connected Feedforward Network with MNIST
# Model Overview

In this lab, we will train a fully connected feedforward network on MNIST data. 

The lab comprises two parts. During the first part, the instructor will walk you through the code to define, train, and evaluate the initial version of FCNN model. In the second part you will compete with other students to improve the performance of the model.


Our fully connected feedforward network - a.k.a multi-layer perceptron - will be relatively simple with 2 hidden layers (`num_hidden_layers`). The number of nodes in the hidden layer being a parameter specified by `hidden_layers_dim`. The figure below illustrates the entire model we will use in this tutorial in the context of MNIST data.

![model-mlp](http://cntk.ai/jup/cntk103c_MNIST_MLP.png)

In this and the following labs we will demonstrate the use of the Functional API. 

# Code Walkthrough
## Initialize environment

In [None]:
import sys
import os
import time
import numpy as np
import cntk as C
from IPython.display import Image

# Ensure we always get the same amount of randomness
np.random.seed(0)


## Data reading

In this lab we are using the MNIST data pre-processed to follow CNTK CTF format. 


    |labels 0 0 0 0 0 0 0 1 0 0 |features 0 0 0 0 ... 
                                                  (784 integers each representing a pixel)
                                                 

Each line in the file contains two key-value pairs, also refered as streams. The `labels` stream is the one-hot encoded representation of a digit 0-9. The `features` stream is a 784 vector of 0-255 integers representing 28 x 28 pixel grayscale image.

Our dataset includes three files: the training file with 50,000 images, the validation file with 10,000 images, and the testing file with 10,000 images.

To read/sample the files, we define a `create_reader` function that configures and returns the CNTK MinibatchSource object.
    

In [None]:
training_file = "../Data/MNIST/MNIST_train.txt"
validation_file = "../Data/MNIST/MNIST_validate.txt"
test_file = '../Data/MNIST/MNIST_test.txt'

# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file
def create_reader(path, is_training, input_dim, num_label_classes):
    return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
        labels = C.io.StreamDef(field='labels', shape=num_label_classes),
        features   = C.io.StreamDef(field='features', shape=input_dim)
    )), randomize = is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)

## Network definition and training

### Define the network


In [None]:
# Define a fully connected feedforward classification network factory with sigmoid neurons in the hidden layers
def create_fcnn_network(input_dim, num_hidden_layers, hidden_layer_dim, num_output_classes):
    # Create inputs 
    features = C.input_variable(input_dim)

    # Scale the input features
    feature_scale = 1.0/256.0
    features_norm = C.element_times(feature_scale, features) 
    
    with C.layers.default_options(init = C.layers.glorot_uniform(), activation = C.ops.sigmoid):
        network_template = C.layers.Sequential([
            C.layers.For(range(num_hidden_layers), lambda i: C.layers.Dense(hidden_layer_dim, name = 'hidden' + str(i))),
            C.layers.Dense(num_output_classes, activation = None, name='classify')])
    
    z = network_template(features_norm)
    return z
    


### Create and visualize the network

In [None]:
input_dim = 784
num_output_classes = 10
num_hidden_layers = 2
hidden_layer_dim = 400

z = create_fcnn_network(input_dim, num_hidden_layers, hidden_layer_dim, num_output_classes)
C.logging.graph.plot(z, "graph.png")
Image("graph.png")

### Configure training 
#### Configure a trainer

In [None]:
def create_trainer(network, labels):
    ## Define loss and metric
    loss = C.cross_entropy_with_softmax(network, labels)
    metric = C.classification_error(network, labels)

    # Create an SGD learner
    lr_schedule = C.learning_rate_schedule(0.2, C.UnitType.minibatch)
    learner = C.sgd(network.parameters, lr_schedule)

    # Create a progress printing helper
    progress_printer = C.logging.ProgressPrinter()

    # Create a trainer
    return C.Trainer(network, (loss, metric), [learner], [progress_printer])


#### Configure a training session

In [None]:
def train_and_validate(network, training_file, validation_file, mb_schedule, epoch_size, num_epochs):

    # Extract input and output dimensions
    input_dim = network.arguments[0].shape[0]
    num_output_classes = network.outputs[0].shape[0]
    
    # Create the training and validation data set readers
    reader_train = create_reader(training_file, True, input_dim, num_output_classes)
    reader_validate = create_reader(validation_file, False, input_dim, num_output_classes)

     # Define mappings from reader streams to network and ground truth inputs
    features = network.arguments[0]
    labels = C.input_variable(num_output_classes, is_sparse=True)
   
    input_map_training = {
        features: reader_train.streams.features,
        labels: reader_train.streams.labels
    }

    # Define mappings from reader streams to network inputs
    input_map_validation = {
        features: reader_validate.streams.features,
        labels: reader_validate.streams.labels
    }
    
    # Create a trainer
    trainer = create_trainer(network, labels)

    # Set up cross-validation configuration
    cv_config = C.CrossValidationConfig(minibatch_source=reader_validate,
                                        model_inputs_to_streams = input_map_validation,
                                        frequency=None)

    # Create a training session
    training_sess = C.training_session(trainer=trainer,
                                 mb_source=reader_train,
                                 mb_size=mb_schedule,
                                 model_inputs_to_streams=input_map_training,
                                 progress_frequency=epoch_size,
                                 max_samples=epoch_size * num_epochs,
                                 cv_config=cv_config 
                                )

    training_sess.train()

### Execute training

In [None]:

epoch_size = 6400
num_epochs = 100
mb_schedule = 64

train_and_validate(z, training_file, validation_file, mb_schedule, epoch_size, num_epochs)

# Hackathon

Try to improve the performance of the model. 

Hints:
- Try different activation functions in hidden layers
- Play with the learning rate, minibatch size and the number of sweeps
- You can look at regularization - check `l1_regularization` and `l2_regularization` hyper parameters of the `sgd` learner
- Try different optimization algorithms

## Final testing


DON'T CHEAT. DON'T USE MNIST_test.txt FOR MODEL TRAINING AND SELECTION. DON'T EXECUTE THE BELOW CELL TILL YOU ARE READY FOR THE FINAL TEST



In [None]:
def final_evaluation(network, test_file):
    
    # Extract input and output dimensions
    input_dim = network.arguments[0].shape[0]
    num_output_classes = network.outputs[0].shape[0]
    
    # Create the test data set readers
    reader = create_reader(test_file, False,  input_dim, num_output_classes)

     # Define mappings from reader streams to network and ground truth inputs
    features = network.arguments[0]
    labels = C.input_variable(num_output_classes, is_sparse=True)
   
    input_map = {
        features: reader.streams.features,
        labels: reader.streams.labels
    }
   
    metric = C.classification_error(network, labels)
    
    evaluator = C.Evaluator(metric, [C.logging.ProgressPrinter()])
    
    minibatch_size = 1024
    data = reader.next_minibatch(minibatch_size, input_map=input_map)
    while bool(data):
        evaluator.test_minibatch(data)
        data = reader.next_minibatch(minibatch_size, input_map=input_map)
    evaluator.summarize_test_progress()
    

In [None]:

final_evaluation(z, test_file)