# Lab 3 - Fully Connected Feedforward Network with MNIST
# Model Overview

In this lab, we will train a fully connected feedforward network on MNIST data. 

In [None]:
from IPython.display import Image
Image(url= "http://3.bp.blogspot.com/_UpN7DfJA0j4/TJtUBWPk0SI/AAAAAAAAABY/oWPMtmqJn3k/s1600/mnist_originals.png", width=200, height=200)


Our fully connected feedforward network - a.k.a multi-layer perceptron - will be relatively simple with 2 hidden layers (`num_hidden_layers`). The number of nodes in the hidden layer being a parameter specified by `hidden_layers_dim`. The figure below illustrates the entire model we will use in this tutorial in the context of MNIST data.

![model-mlp](http://cntk.ai/jup/cntk103c_MNIST_MLP.png)

# Code Walkthrough
## Initialize environment

In [None]:
import sys
import os
import time
import numpy as np
import cntk as C
from cntk.logging.progress_print import ProgressPrinter

# Select the right target device 
# C.device.try_set_default_device(C.device.cpu())
# C.device.try_set_default_device(C.device.gpu(0))



## Data reading

In this tutorial we are using the MNIST data. The dataset has 50,000 training images, 10 validation images and 10,000 test images with each image being 28 x 28 pixels. Thus the number of features is equal to 784 (= 28 x 28 pixels), 1 per pixel. The variable `num_output_classes` is set to 10 corresponding to the number of digits (0-9) in the dataset.

The data is in the following format:

    |labels 0 0 0 0 0 0 0 1 0 0 |features 0 0 0 0 ... 
                                                  (784 integers each representing a pixel)
    


In [None]:
# Ensure we always get the same amount of randomness
np.random.seed(0)

# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file
def create_reader(path, is_training, input_dim, num_label_classes):
    return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
        labels = C.io.StreamDef(field='labels', shape=num_label_classes, is_sparse=False),
        features   = C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)
    )), randomize = is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)

## Model training

### Set up a computational network


In [None]:
# Define a fully connected feedforward classification network with sigmoid neurons in the hidden layers
def create_fcnn_model(features, num_hidden_layers, hidden_layers_dim, num_output_classes):
    with C.layers.default_options(init = C.layers.glorot_uniform(), activation = C.ops.sigmoid):
        h = features
        for _ in range(num_hidden_layers):
            h = C.layers.Dense(hidden_layers_dim)(h)
        r = C.layers.Dense(num_output_classes, activation = None)(h)
        return r

    
# Configure a two hidden-layer FCN with softmax output and cross-entropy loss
input_dim = 784
num_hidden_layers = 2
hidden_layers_dim = 400
num_output_classes = 10

features = C.input(input_dim)
labels = C.input(num_output_classes)

z = create_fcnn_model(features/255.0, num_hidden_layers, hidden_layers_dim, num_output_classes)


### Define a trainer using the SGD learner

In [None]:
# Define a trainer using a given reader and the SGD learner 
def train_model_with_SGD(model, features, labels, reader, num_samples_per_sweep, num_sweeps):
 
    # Define loss and error functions
    loss = C.cross_entropy_with_softmax(model, labels)
    error = C.classification_error(model, labels)

    # Instantiate the trainer object to drive the model training
    learning_rate = 0.2
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(model.parameters, lr_schedule)
    progress_printer = ProgressPrinter(500)
    trainer = C.Trainer(model, (loss, error), [learner], [progress_printer])

   # Initialize the parameters for the trainer
    minibatch_size = 64
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps) / minibatch_size

       # Map the data streams to the input and labels.
    input_map = {
        labels  : reader.streams.labels,
        features  : reader.streams.features
    } 

    # Run the trainer on and perform model training
    start_time = time.time()
    for i in range(0, int(num_minibatches_to_train)):
        data = reader.next_minibatch(minibatch_size, input_map = input_map)
        trainer.train_minibatch(data)

    print(time.time() - start_time)


### Run the trainer



In [None]:
# Create the reader to the training data set
train_file = "../../Data/MNIST_train.txt"
reader = create_reader(train_file, True, input_dim, num_output_classes)
num_samples_per_sweep = 50000
num_sweeps = 10
train_model_with_SGD(z, features, labels, reader, num_samples_per_sweep, num_sweeps)

# Model evaluation
## Define the helper test function

In [None]:
# Define the test function 
def test_model(model, features, labels, reader):
    evaluator = C.Evaluator(C.classification_error(model, labels))
    input_map = {
       features : reader.streams.features,
       labels: reader.streams.labels
    }
    
    minibatch_size = 2000
    test_result = 0.0
    num_minibatches = 0
    data = reader.next_minibatch(minibatch_size, input_map = input_map)
    while bool(data):
        test_result = test_result + evaluator.test_minibatch(data)
        num_minibatches += 1
        data = reader.next_minibatch(minibatch_size, input_map = input_map)
    return None if num_minibatches == 0 else test_result*100 / num_minibatches

## Run the test

In [None]:
validation_file = "../../Data/MNIST_validate.txt"
reader = create_reader(validation_file, False, input_dim, num_output_classes)
error_rate = test_model(z, features, labels, reader)
print("Average validation error: {0:.2f}%".format(error_rate))

# Hackathon

Try to improve the performance of the model. 

Hints:
- Try different activation functions in hidden layers
- Play with the learning rate, minibatch size and the number of sweeps
- You can look at regularization - check `l1_regularization` and `l2_regularization` hyper parameters of the `sgd` learner
- Try different optimization algorithms

## Final testing


DON'T CHEAT. DON'T USE MNIST_test.txt FOR MODEL TRAINING AND SELECTION



In [None]:
test_file = '../../Data/MNIST_test.txt'
reader = create_reader(test_file, False, input_dim, num_output_classes)
error_rate = test_model(z, features, labels, reader)
print("Average test error: {0:.2f}%".format(error_rate))