In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [4]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

In [5]:
from tensorflow import keras

mnist = keras.datasets.mnist

In [6]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [7]:
print(f'Training Data: {train_images.shape}, {train_labels.shape}')
print(f'Test Data: {test_images.shape}, {test_labels.shape}')

In [8]:
class_labels = np.unique(train_labels)
class_labels

In [9]:
plt.figure(figsize = (8, 5))

plt.subplot(1, 3, 1)
plt.imshow(train_images[0])
plt.title(str(train_labels[0]))

plt.subplot(1, 3, 2)
plt.imshow(train_images[2500])
plt.title(str(train_labels[2500]))

plt.subplot(1, 3, 3)
plt.imshow(test_images[12]);

In [10]:
train_images = train_images / 255.0
test_images = test_images / 255.0

In [11]:
x_train = train_images[:50000]
x_val = train_images[50000:]

y_train = train_labels[:50000]
y_val = train_labels[50000:]

In [12]:
print(x_train.shape)
print(x_val.shape, '\n')
print(y_train.shape)
print(y_val.shape)

In [13]:
new_dimension = np.prod(train_images.shape[1:])

x_train = x_train.reshape(x_train.shape[0], new_dimension)
x_val = x_val.reshape(x_val.shape[0], new_dimension)
test_images = test_images.reshape(test_images.shape[0], new_dimension)

In [14]:
print(x_train.shape); print(x_val.shape); print(test_images.shape)

In [15]:
from tensorflow.keras.utils import to_categorical

num_labels = 10
y_train = to_categorical(y_train, num_labels)
y_val = to_categorical(y_val, num_labels)
y_test = to_categorical(test_labels, num_labels)

In [16]:
X = tf.placeholder(tf.float32, [None, new_dimension])
Y = tf.placeholder(tf.float32, [None, num_labels])

In [17]:
#Create model architecture
def multilayer_perceptron(x, num_classes, first_layer_neurons = 256, 
                          second_layer_neurons = 128):
    #For the first layer
    first_weight = tf.Variable(tf.random_uniform([new_dimension, first_layer_neurons]))
    first_bias = tf.Variable(tf.zeros([first_layer_neurons]))
    first_layer_output = tf.nn.relu(tf.add(tf.matmul(x, first_weight), first_bias))
    
    #For the second layer
    second_weight = tf.Variable(tf.random_uniform([first_layer_neurons, second_layer_neurons]))
    second_bias = tf.Variable(tf.zeros([second_layer_neurons]))
    second_layer_output = tf.nn.relu(tf.add(
        tf.matmul(first_layer_output, second_weight), second_bias))
    
    #For the output layer
    final_weight = tf.Variable(tf.random_uniform([second_layer_neurons, num_classes]))
    final_bias = tf.Variable(tf.zeros([num_classes]))
    logits = tf.add(tf.matmul(second_layer_output, final_weight), final_bias)
    
    return logits

In [18]:
logits = multilayer_perceptron(X, num_labels)

In [19]:
learning_rate = 0.01

#The loss and optimizer for the network is then defined
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = Y))
optimiser = tf.train.AdamOptimizer(learning_rate = learning_rate)
train_op = optimiser.minimize(loss_op)

In [20]:
#Initializing the variables
init = tf.global_variables_initializer()

epochs = 20
batch_size = 1000
iteration = len(x_train) // batch_size

In [28]:
#Train the model
with tf.Session() as session:
    session.run(init)
    
    for epoch in range(epochs):
        average_cost = 0
        start, end = 0, batch_size
        
        for i in range(iteration):
            batch_x, batch_y = x_train[start : end], y_train[start : end]
            _, loss = session.run([train_op, loss_op], feed_dict = {X: batch_x, Y: batch_y})
            
            start += batch_size
            end += batch_size
            
            #Average loss
            average_cost += loss / iteration
        
        print(f'Epoch ===== {epoch}')
        
    #Evaluate model
    prediction = tf.nn.softmax(logits)
    ground_truth = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))

    accuracy = tf.reduce_mean(tf.cast(ground_truth, 'float'))

    print(f'\nAccuracy: {accuracy.eval({X: test_images, Y: y_test})}')