In [32]:
import pickle
import gzip
import pandas as pd
import numpy as np
import csv
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import math
from sklearn.cluster import KMeans
import matplotlib.pyplot
from matplotlib import pyplot as plt
import tensorflow as tf
from tqdm import tqdm_notebook

filename = 'mnist.pkl.gz'
f = gzip.open(filename, 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()



In [21]:
## Load USPS on Python 3.x

from PIL import Image
import os
import numpy as np

USPSMat  = []
USPSTar  = []
curPath  = 'USPSdata/Numerals'
savedImg = []

for j in range(0,10):
    curFolderPath = curPath + '/' + str(j)
    imgs =  os.listdir(curFolderPath)
    for img in imgs:
        curImg = curFolderPath + '/' + img
        if curImg[-3:] == 'png':
            img = Image.open(curImg,'r')
            img = img.resize((28, 28))
            savedImg = img
            imgdata = (255-np.array(img.getdata()))/255
            USPSMat.append(imgdata)
            USPSTar.append(j)

In [33]:
X_train, y_train = training_data
X_cv, y_cv = validation_data
X_test, y_test = test_data


In [34]:
def getOneHot(y):
    y_OH = np.zeros((y.shape[0],10))
    for i in range(y.shape[0]):
        y_OH[i,y[i]]=1
    return y_OH

In [35]:
def one_hot_sanity_check(y,y_OH):
    for i in range(y.shape[0]):
        if np.argmax(y_OH[i])!=y[i]:
            print('sanity check failed at index: '+str(i))
            return False
    return True

In [36]:
def add_bias_term(X):
    return(np.concatenate((np.ones((X.shape[0],1)),X),axis=1)) 
    

In [37]:
y_train_OH = getOneHot(y_train)
y_cv_OH    = getOneHot(y_cv)
y_test_OH  = getOneHot(y_test)

In [38]:
one_hot_sanity_check(y_train,y_train_OH)

True

In [39]:
one_hot_sanity_check(y_cv,y_cv_OH)

True

In [40]:
one_hot_sanity_check(y_test,y_test_OH)

True

In [55]:
X_USPS = np.array(USPSMat)
y_USPS = np.array(USPSTar)

y_USPS_OH = getOneHot(y_USPS)

In [66]:
# Build graph
NUM_HIDDEN_NEURONS_LAYER_1 = 800
NUM_HIDDEN_NEURONS_LAYER_2 = 800
LEARNING_RATE = 0.5

graph = tf.Graph()    # Instantiate a graph instance.
with graph.as_default():  # Makes graph as the default graph.
    '''
    we initialize input and output tensors as placeholders. A placeholder initialization just reserves
    the memory space as per the data type and shape of data, whose actual values will be provided later via feed dictionary.
    This enables us to supply different data sets or data batches for training or testing at runtime.
    '''
    inputTensor  = tf.placeholder(tf.float32, [None, (28*28)])
    outputTensor = tf.placeholder(tf.float32, [None, 10])
    
    ''' We use the tf.variable to create variable tensors(tensors whose values can be changed) to store weights 
    because the value of weights need to be updated by the optimizer during training'''
    
    hidden_layer1_weights = tf.Variable(tf.random_normal([(28*28),NUM_HIDDEN_NEURONS_LAYER_1],stddev=0.01))
    hidden_layer1_biases = tf.Variable(tf.zeros(NUM_HIDDEN_NEURONS_LAYER_1))
    
    hidden_layer2_weights = tf.Variable(tf.random_normal([NUM_HIDDEN_NEURONS_LAYER_1,NUM_HIDDEN_NEURONS_LAYER_2],stddev=0.01))
    hidden_layer2_biases = tf.Variable(tf.zeros(NUM_HIDDEN_NEURONS_LAYER_2))
    
    output_layer_weights = tf.Variable(tf.random_normal([NUM_HIDDEN_NEURONS_LAYER_2,10],stddev=0.01))
    output_layer_biases = tf.Variable(tf.zeros(10))
     
    ''' We use the relu as activation functin for each layer. For any input value x,relu produces a value max(0,x).
        Each hidden layer node computes the RELU of matrix multiplication of weights and the corresponding input from the previous layers, 
        as we build a fully connected neural network. The values thus produced act as inputs for the next layer.
    '''
    hidden_layer1_values = tf.nn.relu(tf.matmul(inputTensor,hidden_layer1_weights)+hidden_layer1_biases)
    hidden_layer2_values = tf.nn.relu(tf.matmul(hidden_layer1_values,hidden_layer2_weights)+hidden_layer2_biases)
    logits = tf.matmul(hidden_layer2_values,output_layer_weights)+output_layer_biases
    # Logits are the values that are produced by the output nodes. 
    # After applying softmax function on them, they produce probabilities which determine the class to which that input data belongs. The selected class is the one with highest probability  
    # The loss function determines the error between the predicted value and the actual value, using the cross entropy function in this case.
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=outputTensor))
    # In order to train the model, we run the optimizer(gradient descent in this case with a learning rate of 0.5).
    # By training, we mean that the optimizer updates the values of weights and biases for every layer till the most optimal values are reached. 
    training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    
    # this function chooses the class by choosing the index of the class for which the softmax yields the highest probability.
    prediction = tf.argmax(tf.nn.softmax(logits),axis=1)


In [67]:
NUM_OF_EPOCHS = 3000
BATCH_SIZE = 128

training_accuracy = []
loss_list = []
with tf.Session(graph=graph) as sess:
    tf.global_variables_initializer().run()

    for step in tqdm_notebook(range(NUM_OF_EPOCHS)):
        # Start batch training
        offset = (step * BATCH_SIZE) % (y_train_OH.shape[0] - BATCH_SIZE)
        # Generate a minibatch.
        batch_data = X_train[offset:(offset + BATCH_SIZE), :]
        batch_labels = y_train_OH[offset:(offset + BATCH_SIZE), :]
        sess.run(training, feed_dict={inputTensor: batch_data, 
                                          outputTensor: batch_labels})
        

    # Training accuracy for a dropout value in range 0 to 1
    training_accuracy.append(np.mean(np.argmax(y_train_OH, axis=1) ==
                                 sess.run(prediction, feed_dict={inputTensor: X_train,
                                                                 outputTensor: y_train_OH})))
    loss_list.append(sess.run(loss, feed_dict={inputTensor: X_train,
                                                                 outputTensor: y_train_OH}))   
    #Testing
    predicted_mnist_test = sess.run(prediction, feed_dict={inputTensor: X_test})
    predicted_usps = sess.run(prediction, feed_dict={inputTensor: X_USPS})

HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))

In [68]:
training_accuracy

[0.99384]

In [69]:
# Test accuracy.
np.mean(predicted_mnist_test==y_test)

0.9798

In [70]:
loss_list

[0.018650109]

In [71]:
np.mean(predicted_usps==y_USPS)

0.5077753887694385