# MNIST multilayer network

## Exercise - Load and preprocess data

> **Exercise**: Load the MNIST data. Split it into train, validation and test sets. Standardize the images. Define a `get_batches(X, y, batch_size)` function to generate random X/y batches of size `batch_size` using a Python generator.

In [1]:
import numpy as np
import os

# Load data
with np.load(os.path.join('data', 'mnist-10k.npz')) as npz_file:
      # Load items into a dictionary
    X = npz_file['data']
    y = npz_file['labels']
    
print('X:', X.shape, X.dtype)
print('y:', y.shape, y.dtype)

X: (10000, 784) uint8
y: (10000,) int32


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Create train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    # Convert uint8 pixel values to float
    X.astype(np.float32),
    y,
    test_size=1000, random_state=0
)

# Split test into validation/test sets
X_valid, X_test, y_valid, y_test = train_test_split(
    X_test, y_test,
    test_size=500, random_state=0
)

# Standardize
#scaler = StandardScaler()
#X_train = scaler.fit_transform(X_train)
#X_valid = scaler.transform(X_valid)
#X_test = scaler.transform(X_test)

# Function to get batches of data
def get_batches(X, y, batch_size):
    # Enumerate indexes by steps of batch_size
    # i: 0, b, 2b, 3b, 4b, .. where b is the batch size
    for i in range(0, len(y), batch_size):
        # "yield" data between index i and i+b (not included)
        yield X[i:i+batch_size], y[i:i+batch_size]

## Exercise - Create and train a multilayer network

> **Exercise:** Create a multilayer neural network and train it using your batch generator. Evaluate the accuracy on the validation set after each epoch. Test different architectures and parameters. Evaluate your best network on the test set. Save the trained weights of the first fully connected layer in a variable.

###SetUp TensorFlow Graph (followin explanation from here: https://www.youtube.com/watch?v=XxFYGey6YxM)

In [3]:
import tensorflow as tf
X = tf.placeholder(tf.float32, shape=[None, 784])
Y = tf.placeholder(tf.float32, shape=[None, 10])

####Neural Network Architecture

In [4]:
#Hyperparameters
nr_epochs = 5
learning_rate = 0.0001
#2 hidden layers and number of neurons
n_hidden1 = 512
n_hidden2 = 64

#setting of initial weight 1 (784 is the total amount of inital inputs)
initial_w1 = tf.truncated_normal(shape=[784, n_hidden1], stddev=0.1, seed=42)
#Creation a variable for weight where all weights for layer 1 will be kept
w1 = tf.Variable(initial_value=initial_w1)

#setting initial bias (what shifts the activation function from left to the right) and creation the relavnt variable
initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
b1 = tf.Variable(initial_value=initial_b1)

In [5]:
#Setting the first hidden layer
layer1_in = tf.matmul(X, w1) + b1
layer1_out = tf.nn.relu(layer1_in)

In [6]:
#setting of initial weight 2 
initial_w2 = tf.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42)
w2 = tf.Variable(initial_value=initial_w2)

initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
b2 = tf.Variable(initial_value=initial_b2)

#Setting the second hidden layer
layer2_in = tf.matmul(layer1_out, w2) + b2
layer2_out = tf.nn.relu(layer2_in)

In [7]:
#setting of final weight 3 (considering the final output (number of classes = 10))
initial_w3 = tf.truncated_normal(shape=[n_hidden2, 10], stddev=0.1, seed=42)
w3 = tf.Variable(initial_value=initial_w3)

initial_b3 = tf.constant(value=0.0, shape=[10])
b3 = tf.Variable(initial_value=initial_b3)

#Setting the final output layer
layer3_in = tf.matmul(layer2_out, w3) + b3
output = tf.nn.softmax(layer3_in)

### Loss, Optimization and Metrics

####Defining loss function

In [8]:
#using avarage of losses among bathes using tf.reduce_mean
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

####Defining optimizer

In [9]:
#including minimazation of our loss and updating our variables (weights and biases)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_step = optimizer.minimize(loss)

#### Accuracy metric

In [10]:
#getting index of the maximum value from the output using argmax
correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

### Run Session

In [11]:
sess = tf.Session()

#to initialise all the variables set above
init = tf.global_variables_initializer()
sess.run(init)

In [12]:
#### bathching the Data with bacth size = 1000
num_examples = y_train.shape[0]
nr_iterations = int(num_examples/1000)

index_in_epoch = 0

In [13]:
def next_batch(batch_size, data, labels):
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    #index_in_epoch = index_in_epoch + batch size
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

####Training loop

In [14]:
for epoch in range (nr_epochs):
    
    for i in range(nr_iterations):
        
        batch_x, batch_y = next_batch(batch_size=1000, data=X_train, labels=y_train)
        
        #to feed the session, which will run all calculations
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        sess.run(train_step, feed_dict=feed_dictionary)
        
        batch_accuracy = sess.run(fetches=[accuracy], feed_dict=feed_dictionary)
    
    print(f'Epoch {epoch} \t| Training Accuracy = {batch_accuracy}')
    
print('Done training!')

ValueError: Cannot feed value of shape (1000,) for Tensor 'Placeholder_1:0', which has shape '(?, 10)'

## Exercise - Visualize weights

> **Exercise**: Plot the weights from the first fully connected layer (the templates) with the `imshow()` function.

In [None]:
???