In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

dropout = False
reg = 0

# Load the data
def loadData():
    with np.load("notMNIST.npz") as data:
        Data, Target = data["images"], data["labels"]
        np.random.seed(521)
        randIndx = np.arange(len(Data))
        np.random.shuffle(randIndx)
        Data = Data[randIndx] / 255.0
        Target = Target[randIndx]
        trainData, trainTarget = Data[:10000], Target[:10000]
        validData, validTarget = Data[10000:16000], Target[10000:16000]
        testData, testTarget = Data[16000:], Target[16000:]
    return trainData, validData, testData, trainTarget, validTarget, testTarget

# Implementation of a neural network using only Numpy - trained using gradient descent with momentum
def convertOneHot(trainTarget, validTarget, testTarget):
    newtrain = np.zeros((trainTarget.shape[0], 10))
    newvalid = np.zeros((validTarget.shape[0], 10))
    newtest = np.zeros((testTarget.shape[0], 10))
    for item in range(0, trainTarget.shape[0]):
        newtrain[item][trainTarget[item]] = 1
    for item in range(0, validTarget.shape[0]):
        newvalid[item][validTarget[item]] = 1
    for item in range(0, testTarget.shape[0]):
        newtest[item][testTarget[item]] = 1
    return newtrain, newvalid, newtest
#data like ==> newtrain[0-9999][0-9]

def shuffle(trainData, trainTarget):
    np.random.seed(421)
    randIndx = np.arange(len(trainData))
    target = trainTarget
    np.random.shuffle(randIndx)
    data, target = trainData[randIndx], target[randIndx]
    return data, target

In [2]:
def convolutional_layers(features, labels):
    # Input Layer
    input = tf.reshape(features, shape=[-1, 28, 28, 1])
    # 3x3 convolution, 1 input, 32 outputs
    W1 = tf.get_variable("W1", [3, 3, 1, 32], dtype='float32',initializer=tf.contrib.layers.xavier_initializer())
    b1 = tf.get_variable('b1', [32], dtype='float32', initializer=tf.contrib.layers.xavier_initializer())
    conv = tf.nn.conv2d(input, W1, strides=[1, 1, 1, 1], padding='SAME')

    conv1 = tf.nn.relu(conv + b1, name='conv1')

    # Batch Normalization layer
    mean, variance = tf.nn.moments(conv1, axes=[0, 1, 2])
    bn = tf.nn.batch_normalization(x=conv1, mean=mean, variance=variance, offset=None, scale=None, variance_epsilon=0.001)

    # 2×2 max pooling layer
    pool = tf.nn.max_pool(bn, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Flatten Layer
    pool = tf.reshape(pool, [-1, 6272])

    # Fully connected layer relu
    W2 = tf.get_variable('W2', [6272, 1024], dtype='float32', initializer=tf.contrib.layers.xavier_initializer())
    b2 = tf.get_variable('b2', [1024], dtype='float32', initializer=tf.contrib.layers.xavier_initializer())
    fc1 = tf.nn.relu(tf.matmul(pool, W2) + b2)

    # Fully connected layer with softmax
    W3 = tf.get_variable('W3', [1024, 10], dtype='float32', initializer=tf.contrib.layers.xavier_initializer())
    b3 = tf.get_variable('b3', [10], dtype='float32', initializer=tf.contrib.layers.xavier_initializer())
    #sm = tf.get_variable('sm', [labels[0].shape, labels[1].shape], dtype='float64', initializer=tf.contrib.layers.xavier_initializer())
    fc2 = tf.matmul(fc1, W3) + b3
    sm = tf.nn.softmax(fc2)
    acc, acc_op = tf.metrics.accuracy(labels=tf.argmax(sm, 1), predictions=tf.argmax(labels, 1))

    # Loss with cross entropy
    entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=fc2)
    loss = tf.reduce_mean(entropy)

    return loss, W3, b3, acc_op

def Model_Training(features, labels):

    dim = 10
    N = features.shape[0]
    dim_x = features.shape[1]
    dim_y = features.shape[2]
    batch_size = 32
    epoch = 50
    runs = int(N / batch_size)

    # Define placeholders to feed mini_batches
    X = tf.placeholder(tf.float32, shape=(batch_size, dim_x * dim_y), name="X")
    Y = tf.placeholder(tf.float32, shape=(batch_size, None), name="Y")
    lam = tf.placeholder(tf.float32, shape=None, name="lam")

    loss, W, b, accer = convolutional_layers(X, Y)

    opt = tf.train.AdamOptimizer(0.0001).minimize(loss)

    return W, b, Y, X, loss, opt, accer, lam

In [3]:
def Batch(itr,x_i,y_i,batch_size,reg,lam,sess,op,loss,X,Y,acc_o,Type):
    d_s = (batch_size,x_i.shape[1]*x_i.shape[2])
    t_s = (batch_size,1)
    print("I am here")
    for j in range(itr):
        print("Reaching"+str(j))
        x_batch = x_i[j*batch_size:(j+1)*batch_size].reshape(d_s)
        y_batch = y_i[j*batch_size:(j+1)*batch_size]
        if Type == 'train':
            _, l, acc = sess.run([op,loss,acc_o],feed_dict={X:x_batch,Y:y_batch,lam:reg})
            print("End")
        else:
            l, acc = sess.run([loss,acc_o],feed_dict={X:x_batch,Y:y_batch,lam:reg})
        print("End here")
        print(l,acc)
    return l, acc
def SGD(batch_size,iterations):
    trainData, validData, testData, trainTarget, validTarget,testTarget = loadData()
    trainTarget, validTarget, testTarget = convertOneHot(trainTarget, validTarget, testTarget)
    W, b, Y, X, loss, op, acc_o, lam = Model_Training(trainData,trainTarget)
    l_tr = []
    l_v = []
    l_t = []
    a_tr = []
    a_v = []
    a_t = []
    tr_batch = int(trainData.shape[0]/batch_size)
    v_batch = int(validData.shape[0]/batch_size)
    t_batch = int(testData.shape[0]/batch_size)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        for i in range(iterations):
            tr_r = np.arange(trainData.shape[0])
            np.random.shuffle(tr_r)
            trainData = trainData[tr_r]
            trainTarget = trainTarget[tr_r]
            v_r = np.arange(validData.shape[0])
            np.random.shuffle(v_r)
            validData = validData[v_r]
            validTarget = validTarget[v_r]
            t_r = np.arange(testData.shape[0])
            np.random.shuffle(t_r)
            testData = testData[t_r]
            testTarget = testTarget[t_r]
            #Batch(itr,x_i,y_i,batch_size,reg,lam,sess,op,loss,X,Y,acc,type)
            loss, acc = Batch(tr_batch,trainData,trainTarget,batch_size,reg,lam,sess,op,loss,X,Y,acc_o,'train')
            e_v, acc_v = Batch(v_batch,validData,validTarget,batch_size,reg,lam,sess,op,loss,X,Y,acc_o,'valid')
            e_t, acc_t = Batch(t_batch,testData,testTarget,batch_size,reg,lam,sess,op,loss,X,Y,acc_o,'test')
            l_tr.append(loss)
            l_v.append(e_v)
            l_t.append(e_t)
            a_tr.append(acc)
            a_v.append(acc_v)
            a_t.append(acc_t)
    return l_tr, l_v, l_t, a_tr, a_v, a_t
        

In [None]:
trainData, validData, testData, trainTarget, validTarget, testTarget = loadData()
trainTarget, validTarget, testTarget = convertOneHot(trainTarget, validTarget, testTarget)
l_train, l_valid, l_test, a_train, a_valid, a_test = SGD(32, 50)

I am here
Reaching0
End
End here
2.6278086 0.09375
Reaching1
End
End here
2.533319 0.15625
Reaching2
End
End here
1.5706352 0.28125
Reaching3
End
End here
1.4417468 0.3203125
Reaching4
End
End here
1.2288163 0.36875
Reaching5
