# Imports

In [1]:
from numpy.random import seed
seed(888)
from tensorflow import set_random_seed
set_random_seed(404)

In [2]:
import os
import numpy as np
import tensorflow as tf

from time import strftime
from PIL import Image

# Constants

In [3]:
X_TRAIN_PATH = 'MNIST/digit_xtrain.csv'
X_TEST_PATH = 'MNIST/digit_xtest.csv'
Y_TRAIN_PATH = 'MNIST/digit_ytrain.csv'
Y_TEST_PATH = 'MNIST/digit_ytest.csv'

LOGGING_PATH = 'tensorboard_mnist_digit_logs/'

NR_CLASSES = 10
VALIDATION_SIZE = 10000

IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_HEIGHT*IMAGE_WIDTH*CHANNELS

# Get the Data

In [4]:
# Loading the data into an array
y_train_all = np.loadtxt(Y_TRAIN_PATH, delimiter=',', dtype=int)

y_test = np.loadtxt(Y_TEST_PATH, delimiter=',', dtype=int)

In [5]:
x_train_all = np.loadtxt(X_TRAIN_PATH, delimiter=',', dtype=int)

x_test = np.loadtxt(X_TEST_PATH, delimiter=',', dtype=int)

# Explore the Data

In [6]:
# Shape of the training dataset
x_train_all.shape

(60000, 784)

In [7]:
# 1st array in the training dataset
x_train_all[0]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   3,  18,  18,  18,
       126, 136, 175,  26, 166, 255, 247, 127,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,  30,  36,  94, 154, 17

In [8]:
y_train_all.shape

(60000,)

In [9]:
# Shape of the testing dataset
x_test.shape

(10000, 784)

# Data Preprocessing

In [10]:
# Rescale the features
x_train_all, x_test = x_train_all / 255.0 , x_test / 255.0

#### Convert target valuse into one-hot encoding

In [11]:
# this is a sparse matrix
values = y_train_all[:5]

# full matrix
np.eye(10)[values]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [12]:
y_train_all = np.eye(NR_CLASSES)[y_train_all]

In [13]:
y_test = np.eye(NR_CLASSES)[y_test]

In [14]:
y_test.shape

(10000, 10)

### Create a Validation from trainig dataset

**Challege:** Split the trainig data into smaller trainig dataset and a validation dataset for the features and the labels. Create 4 arrays ```x_val``` , ```y_val``` , ```x_train``` and ```y_train``` from ```x_train_all``` and ```y_train_all```. Use the validation size of 10k.

In [15]:
x_val = x_train_all[:VALIDATION_SIZE]

y_val = y_train_all[:VALIDATION_SIZE]

In [16]:
x_train = x_train_all[VALIDATION_SIZE:]

y_train = y_train_all[VALIDATION_SIZE:]

In [17]:
x_train.shape

(50000, 784)

In [18]:
x_val.shape

(10000, 784)

# Setup TensorFlow Graph

In [19]:
# Add/ Define tf placeholder(for features)
X = tf.placeholder(tf.float32, shape=[None, TOTAL_INPUTS], name='X')

# placeholder for labels
Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES], name='labels')

### Neural Network Architecture

#### Hyperparameters

In [20]:
nr_epochs = 50
learning_rate = 1e-3          # 0.0001

# 1st hidden layer(contains 512 neurons)
n_hidden1=512
n_hidden2=64

In [21]:
def setup_layer(input, weight_dim, bias_dim, name):
    
    with tf.name_scope(name):
        initial_w = tf.truncated_normal(shape=weight_dim, stddev=0.1, seed=42)
        w = tf.Variable(initial_value=initial_w, name='W')

        initial_b = tf.constant(value=0.0, shape=bias_dim)
        b = tf.Variable(initial_value=initial_b, name='B')

        layer_in = tf.matmul(input, w) + b
        
        if name=='out':
            layer_out = tf.nn.softmax(layer_in)
            
        else:
            layer_out = tf.nn.relu(layer_in)
            
        tf.summary.histogram('weights', w)
        tf.summary.histogram('biases', b)
            
        return layer_out


In [22]:
layer_1 = setup_layer(X, weight_dim=[TOTAL_INPUTS, n_hidden1], bias_dim=[n_hidden1], name='Layer_1')

layer_2 = setup_layer(layer_1, weight_dim=[n_hidden1, n_hidden2], bias_dim=[n_hidden2], name='Layer_2')

output = setup_layer(layer_2, weight_dim=[n_hidden2, NR_CLASSES], bias_dim=[NR_CLASSES], name='out')

model_name = f'{n_hidden1} - {n_hidden2} LR {learning_rate} E {nr_epochs}'

# TensorBoard Setup

In [23]:
# Folder for tensorboard
folder_name = f'{model_name} at {strftime("%H %M")}'
directory = os.path.join(LOGGING_PATH, folder_name)

try:
    os.makedirs(directory)
except OSError as exception:
    print(exception.strerror) 
else:
    print('Successefully Created Directory!')

Successefully Created Directory!


# Loss, Optimization & Metrics

#### Define Loss Funtion

In [24]:
with tf.name_scope('loss_calc'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=output))

#### Define Optimizer

In [25]:
with tf.name_scope('optimizer'):
    
    optimizer = tf.train.AdamOptimizer(learning_rate)

    train_step = optimizer.minimize(loss)

#### Accuracy Matric

In [26]:
with tf.name_scope('accuracy_calc'):
    
    correct_pred = tf.equal(tf.argmax(output, axis=1), tf.argmax(Y, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [27]:
with tf.name_scope('performance'):
    
    # for the tensorboard.
    tf.summary.scalar('accuracy', accuracy)

    tf.summary.scalar('cost', loss)

# Run Session

In [28]:
sess = tf.Session()

#### Setup filewriter and Merge Summeries

In [29]:
merged_summary = tf.summary.merge_all()

# filewriter
train_writer = tf.summary.FileWriter(directory + '/train')

# filewriter will look for the calculation to write them
train_writer.add_graph(sess.graph)

# filewriter for the validation
validation_writer = tf.summary.FileWriter(directory + '/validation')

#### Initialize all the variables

In [30]:
init = tf.global_variables_initializer()

# feed the variables in the session
sess.run(init)

## Batching the data(split)

In [31]:
size_of_batch = 1000

In [32]:
num_examples = y_train.shape[0]
nr_iterations = int(num_examples / size_of_batch)

index_in_epoch = 0 

In [33]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    
    end = index_in_epoch
    
    return data[start:end], labels[start:end]

### Training Loop

In [34]:
for epoch in range(nr_epochs):
    
    # ======================= Training Dataset ======================= #
    for i in range(nr_iterations):
        
        batch_x, batch_y = next_batch(batch_size=size_of_batch, data=x_train, labels=y_train)
        
        # Feed dictionary(to be fed into session for calculation)
        feed_dictionary = {X:batch_x, Y:batch_y}
        
        sess.run(train_step, feed_dict=feed_dictionary)
        
        
    s, batch_accuracy = sess.run(fetches=[merged_summary, accuracy], feed_dict=feed_dictionary)
    
    # filewriter
    train_writer.add_summary(s, epoch)
        
    print(f'Epoch {epoch} \t | Training Accuracy = {batch_accuracy}')
    
    # ===================== Validation Dataset ================== #
    summary = sess.run(fetches=merged_summary, feed_dict={X:x_val, Y:y_val})
    validation_writer.add_summary(summary, epoch)
    
print('Done Training!')

Epoch 0 	 | Training Accuracy = 0.8700000047683716
Epoch 1 	 | Training Accuracy = 0.8859999775886536
Epoch 2 	 | Training Accuracy = 0.9100000262260437
Epoch 3 	 | Training Accuracy = 0.9729999899864197
Epoch 4 	 | Training Accuracy = 0.9769999980926514
Epoch 5 	 | Training Accuracy = 0.9789999723434448
Epoch 6 	 | Training Accuracy = 0.9779999852180481
Epoch 7 	 | Training Accuracy = 0.9779999852180481
Epoch 8 	 | Training Accuracy = 0.9860000014305115
Epoch 9 	 | Training Accuracy = 0.9860000014305115
Epoch 10 	 | Training Accuracy = 0.9869999885559082
Epoch 11 	 | Training Accuracy = 0.9869999885559082
Epoch 12 	 | Training Accuracy = 0.9890000224113464
Epoch 13 	 | Training Accuracy = 0.9890000224113464
Epoch 14 	 | Training Accuracy = 0.9890000224113464
Epoch 15 	 | Training Accuracy = 0.9890000224113464
Epoch 16 	 | Training Accuracy = 0.9900000095367432
Epoch 17 	 | Training Accuracy = 0.9900000095367432
Epoch 18 	 | Training Accuracy = 0.9890000224113464
Epoch 19 	 | Training 

# Make a Prediction


In [35]:
img = Image.open('MNIST/test_img.png')

In [36]:
# Convert the image into grey scale(black and white)
bw = img.convert('L')

In [37]:
# convert the image into an array
img_array = np.invert(bw)

In [38]:
# Flattening the arrray
test_img = img_array.ravel()

In [39]:
# feed it to tensorflow(using session(sess))
prediction = sess.run(fetches=tf.argmax(output, axis=1),feed_dict={X:[test_img]})

In [40]:
print(f'Prediction for the test image is {prediction}')

Prediction for the test image is [2]


# Testing and Evaluation

In [41]:
# Accuracy over the test dataset
test_accuracy = sess.run(fetches=accuracy, feed_dict={X:x_test, Y:y_test})

print(f'Accuracy on test set is {test_accuracy:0.2%}')

Accuracy on test set is 98.01%


# Reset for the Next Run

In [42]:
train_writer.close()
validation_writer.close()
sess.close()
tf.reset_default_graph()

# This is how we define each individual layer 

In [43]:
# 1st hidden layer
    # initialize weights
  #  initial_w1 = tf.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1, seed=42)

    # creating weights
   # w1 = tf.Variable(initial_value=initial_w1, name='w1')

    # Initialize Biases
    #initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])

    # to hold all the biases in the 1st hiddel layer(Creating biases)
    #b1 = tf.Variable(initial_value=initial_b1, name='b1')

    # The inputs that come into the 1st hidden layer
    #layer1_in = tf.matmul(X, w1) + b1

    # the output from the 1st hidden layer
    #layer1_out = tf.nn.relu(layer1_in)

In [44]:
# 1st hidden layer
#with tf.name_scope('hidden_1'):
    
 
    #initial_w1 = tf.truncated_normal(shape=[TOTAL_INPUTS, n_hidden1], stddev=0.1, seed=42)
    #w1 = tf.Variable(initial_value=initial_w1, name='w1')
                                     
  #  initial_b1 = tf.constant(value=0.0, shape=[n_hidden1])
   # b1 = tf.Variable(initial_value=initial_b1, name='b1')
                                     
  #  layer1_in = tf.matmul(X, w1) + b1
   # layer1_out = tf.nn.relu(layer1_in)

**Challenge:** Set up the second hidden layer with 64 neurons. That works off the output of the 1st hidden layer. Then set up the output layer with ```softmax``` activation funtion.

In [45]:
# 2nd hidden layer
#with tf.name_scope('hidden_2'):
 #   initial_w2 = tf.truncated_normal(shape=[n_hidden1, n_hidden2], stddev=0.1, seed=42)
  #  w2 = tf.Variable(initial_value=initial_w2, name='w2')

   # initial_b2 = tf.constant(value=0.0, shape=[n_hidden2])
 #   b2 = tf.Variable(initial_value=initial_b2, name='b2')

  #  layer2_in = tf.matmul(layer1_in, w2) + b2
   # layer2_out = tf.nn.relu(layer2_in)

In [46]:
# The output layer
#with tf.name_scope('Output_Layer'):
 #   initial_w3 = tf.truncated_normal(shape=[n_hidden2, NR_CLASSES], stddev=0.1, seed=42)
 #   w3 = tf.Variable(initial_value=initial_w3, name='w3')

  #  initial_b3 = tf.constant(value=0.0, shape=[NR_CLASSES])
  #  b3 = tf.Variable(initial_value=initial_b3, name='b3')

#    layer3_in = tf.matmul(layer2_in, w3) + b3
 #   output = tf.nn.softmax(layer3_in)

In [47]:
# after the session, we have weights and biases
# we can see the initial weights(w1)
#w1.eval(sess)