In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

##### extract the data ###########
pickle_file = 'notMNIST.pickle'

with open(pickle_file,'rb')as f:
    full_data=pickle.load(f)
    train_dataset=full_data['train_dataset']
    train_labels=full_data['train_labels']
    test_dataset=full_data['test_dataset']
    test_labels=full_data['test_labels']
    valid_dataset=full_data['valid_dataset']
    valid_labels=full_data['valid_labels']
del full_data
print("train_dataset  ",train_dataset.shape,train_labels.shape)
print("test_dataset ",test_dataset.shape,test_labels.shape)
print("valid_dataset ",valid_dataset.shape,valid_labels.shape)


########## Reshape it #########
image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

train_dataset   (200000, 28, 28) (200000,)
test_dataset  (10000, 28, 28) (10000,)
valid_dataset  (10000, 28, 28) (10000,)
Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [2]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])


# Problem 1

In [3]:
######### fit the graph ###########

graph=tf.Graph()
batch_size=128
num_labels=10
K=1024

with graph.as_default():
    tf_train_dataset=tf.placeholder(tf.float32,shape=(batch_size,image_size*image_size))
    tf_train_labels=tf.placeholder(tf.float32,shape=(batch_size,num_labels))
    tf_valid_dataset=tf.constant(valid_dataset)
    tf_test_dataset=tf.constant(test_dataset)
    beta=tf.placeholder(tf.float32)
    
    #parameters..............
    
    #### Layer 1
    w1=tf.Variable(tf.truncated_normal([image_size*image_size,K]))
    b1=tf.Variable(tf.zeros([K]))
    #### Final Layer
    w2=tf.Variable(tf.truncated_normal([K,num_labels]))
    b2=tf.Variable(tf.zeros([num_labels]))
    
    #model computation ..................
    #dropout
    pkeep=tf.placeholder(tf.float32)
    y1=tf.nn.relu(tf.matmul(tf_train_dataset,w1)+b1)
    yf=tf.nn.dropout(y1,pkeep)
    y2=tf.matmul(yf,w2)+b2
    
    #loss calculation with L2 regularization
    loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y2,labels=tf_train_labels))
    regularize=tf.nn.l2_loss(w2)
    loss=tf.reduce_mean(loss+beta*regularize)
    
    #decay learning rate
    global_step=tf.Variable(0,trainable=False)
    starter_learning_rate=0.05
    learning_rate=tf.train.exponential_decay(starter_learning_rate,global_step,2000,0.8,staircase=True)
        
    
    #optimize............
    optimizer=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
    
    #prediction..........
    #train
    train_prediction=tf.nn.softmax(y2)
    
    #validation
    #dropout
    v1=tf.nn.relu(tf.matmul(tf_valid_dataset,w1)+b1)
    v2=tf.matmul(v1,w2)+b2
    valid_prediction=tf.nn.softmax(v2)
    
    #test
    #no dropout
    t1=tf.nn.relu(tf.matmul(tf_test_dataset,w1)+b1)
    t2=tf.matmul(t1,w2)+b2
    test_prediction=tf.nn.softmax(t2)

In [4]:
#train the model

num_steps=5001

with tf.Session(graph=graph) as sess:
    tf.global_variables_initializer().run()
    print("Initialised")
    
    for step in range(num_steps):
        #create a mini batch 
        offset=np.random.randint(0,(200000-128))
        batch_data=train_dataset[offset:(offset+batch_size),:]
        batch_labels=train_labels[offset:(offset+batch_size),:]
        feed_dict={tf_train_dataset:batch_data,tf_train_labels:batch_labels,beta:0.1,pkeep:0.75}
        #run the session
        opt,l,pred_train=sess.run([optimizer,loss,train_prediction],feed_dict=feed_dict)
        if (step %500 == 0):
            print ("loss at %d:%f" %(step,l))
            print ("Mini-Batch Accuracy : %.2f%%" %accuracy(pred_train,batch_labels))
            print ("validation Accuracy : %.2f%%" %accuracy(valid_prediction.eval(),valid_labels))
    print("-"*20)
    print("Test Accuracy: %.2f%%" %accuracy(test_prediction.eval(),test_labels))

Initialised
loss at 0:831.676270
Mini-Batch Accuracy : 16.41%
validation Accuracy : 22.16%
loss at 500:13.169919
Mini-Batch Accuracy : 76.56%
validation Accuracy : 79.19%
loss at 1000:7.222066
Mini-Batch Accuracy : 78.91%
validation Accuracy : 73.88%
loss at 1500:7.828028
Mini-Batch Accuracy : 73.44%
validation Accuracy : 72.84%
loss at 2000:6.698494
Mini-Batch Accuracy : 78.91%
validation Accuracy : 80.31%
loss at 2500:5.941772
Mini-Batch Accuracy : 73.44%
validation Accuracy : 71.18%
loss at 3000:4.833040
Mini-Batch Accuracy : 75.78%
validation Accuracy : 81.54%
loss at 3500:5.942774
Mini-Batch Accuracy : 71.09%
validation Accuracy : 80.77%
loss at 4000:9.641151
Mini-Batch Accuracy : 64.84%
validation Accuracy : 69.63%
loss at 4500:3.798401
Mini-Batch Accuracy : 70.31%
validation Accuracy : 73.66%
loss at 5000:5.551904
Mini-Batch Accuracy : 73.44%
validation Accuracy : 77.54%
--------------------
Test Accuracy: 84.42%
