In [5]:
import cPickle
import gzip
import numpy as np

data_file = gzip.open('mnist.pkl.gz','rb')
train, test, validation = cPickle.load(data_file)

train_features = train[0]
train_labels = train[1]

test_features = test[0]
test_labels = test[1]

validation_features = validation[0]
validation_labels = validation[1]

usps_file = gzip.open('usps_data.pkl.gz','rb')
usps_features,usps_labels = cPickle.load(usps_file)

print "Data Loaded"

 Data Loaded


In [6]:
import tensorflow as tf

class ConvolutionalNN:
    
    def __init__(self, train_features, train_labels, l1_size=32, l2_size=64):
        self.session = tf.InteractiveSession()
        self.input_dims = (28, 28)
        self.train_features = train_features
        self.train_labels = self.__oneHotEncode(train_labels)
        self.X = tf.placeholder(tf.float32, shape=[None, self.train_features.shape[1]])
        self.Y = tf.placeholder(tf.float32, shape=[None, self.train_labels.shape[1]])
        self.patch_size = (5, 5)
        self.image_channels = 1
        self.conv_layer_one_features = l1_size
        self.conv_layer_two_features = l2_size
        self.dense_layer_neurons = 1024
        self.bias_constant = 0.1
        
        
    def __oneHotEncode(self, vector):
        encoded_vector = np.zeros((vector.size, vector.max()+1), dtype=vector.dtype)
        encoded_vector[np.arange(vector.size), vector] = 1
        return encoded_vector
    
    def __createWeight(self, shape):
        return tf.Variable(tf.truncated_normal(shape, stddev=0.1))
    
    def __createBias(self, shape):
        return tf.Variable(tf.constant(self.bias_constant, shape=shape))
    
    def __convolve(self, data_in , weights):
        return tf.nn.conv2d(data_in, weights, strides=[1,1,1,1], padding='SAME')
    
    def __reduce(self, data_in):
        return tf.nn.max_pool(data_in, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    
    def __crossEntropy(self):
        return tf.reduce_mean(-tf.reduce_sum(self.Y * tf.log(self.Y_pred), reduction_indices=[1]))
    
    def __checkPrediction(self):
        return tf.equal(tf.argmax(self.Y_pred, 1), tf.argmax(self.Y, 1))
    
    def trainAndEvaluate(self, test_features, test_labels, validation_features, validation_labels, usps_features, usps_labels):
        self.conv_layer_one_weights = self.__createWeight([self.patch_size[0], self.patch_size[1], self.image_channels, self.conv_layer_one_features])
        self.conv_layer_one_bias = self.__createBias([self.conv_layer_one_features])
        
        self.X_tensor = tf.reshape(self.X, [-1,self.input_dims[0],self.input_dims[1],1])
        
        self.conv_layer_one_activation = tf.nn.relu(self.__convolve(self.X_tensor, self.conv_layer_one_weights) + self.conv_layer_one_bias)
        self.conv_layer_one_activation_reduced = self.__reduce(self.conv_layer_one_activation)
        
        self.conv_layer_two_weights = self.__createWeight([self.patch_size[0], self.patch_size[1], self.conv_layer_one_features, self.conv_layer_two_features])
        self.conv_layer_two_bias = self.__createBias([self.conv_layer_two_features])
        
        self.conv_layer_two_activation = tf.nn.relu(self.__convolve(self.conv_layer_one_activation_reduced, self.conv_layer_two_weights) + self.conv_layer_two_bias)
        self.conv_layer_two_activation_reduced = self.__reduce(self.conv_layer_two_activation)
        
        self.dense_layer_weights = self.__createWeight([(self.input_dims[0]/4)*(self.input_dims[1]/4)*self.conv_layer_two_features, self.dense_layer_neurons])
        self.dense_layer_bias = self.__createBias([self.dense_layer_neurons])
        
        temp_flat = tf.reshape(self.conv_layer_two_activation_reduced, [-1, self.input_dims[0]/4 * self.input_dims[1]/4 * self.conv_layer_two_features])
        self.dense_layer_activation = tf.nn.relu(tf.matmul(temp_flat, self.dense_layer_weights) + self.dense_layer_bias)
        
        self.drop_threshold = tf.placeholder(tf.float32)
        self.dense_layer_activation_drop = tf.nn.dropout(self.dense_layer_activation, self.drop_threshold)
        
        self.readout_layer_weights = self.__createWeight([self.dense_layer_neurons, self.train_labels.shape[1]])
        self.readout_layer_bias = self.__createBias([self.train_labels.shape[1]])
        
        self.Y_pred = tf.nn.softmax(tf.matmul(self.dense_layer_activation_drop, self.readout_layer_weights) + self.readout_layer_bias)
        
        train_step = tf.train.AdamOptimizer(1e-4).minimize(self.__crossEntropy())
        accuracy = tf.reduce_mean(tf.cast(self.__checkPrediction(), tf.float32))
        self.session.run(tf.initialize_all_variables())
        train_features_batches = np.array_split(self.train_features, len(self.train_features)/50)
        train_labels_batches = np.array_split(self.train_labels, len(self.train_labels)/50)
        for i in range(10):
            for b in range(len(self.train_features)/50):
                x_batch = train_features_batches[b]
                y_batch = train_labels_batches[b]
                if b%200==0:
                    train_accuracy = accuracy.eval(feed_dict={self.X: x_batch, self.Y:y_batch, self.drop_threshold:1.0})
                    print "Training Accuracy at b",b,": ", train_accuracy
                train_step.run(feed_dict={self.X: x_batch, self.Y:y_batch, self.drop_threshold:0.5})
        
        print "Test Accuracy: ", accuracy.eval(feed_dict={self.X: test_features, self.Y:self.__oneHotEncode(test_labels), self.drop_threshold:1.0})
        print "Validation Accuracy: ", accuracy.eval(feed_dict={self.X: validation_features, self.Y:self.__oneHotEncode(validation_labels), self.drop_threshold:1.0})
        print "USPS Accuracy: ", accuracy.eval(feed_dict={self.X: usps_features, self.Y:self.__oneHotEncode(usps_labels), self.drop_threshold:1.0})

In [7]:
cnn = ConvolutionalNN(train_features, train_labels)
cnn.trainAndEvaluate(test_features, test_labels, validation_features, validation_labels, usps_features, usps_labels)

Training Accuracy at b 0 :  0.1
Training Accuracy at b 200 :  0.88
Training Accuracy at b 400 :  0.88
Training Accuracy at b 600 :  0.92
Training Accuracy at b 800 :  0.94
Training Accuracy at b 0 :  0.96
Training Accuracy at b 200 :  0.94
Training Accuracy at b 400 :  0.96
Training Accuracy at b 600 :  0.96
Training Accuracy at b 800 :  0.94
Training Accuracy at b 0 :  0.98
Training Accuracy at b 200 :  0.94
Training Accuracy at b 400 :  0.96
Training Accuracy at b 600 :  0.96
Training Accuracy at b 800 :  0.98
Training Accuracy at b 0 :  0.98
Training Accuracy at b 200 :  0.96
Training Accuracy at b 400 :  0.98
Training Accuracy at b 600 :  0.96
Training Accuracy at b 800 :  0.98
Training Accuracy at b 0 :  1.0
Training Accuracy at b 200 :  0.98
Training Accuracy at b 400 :  0.98
Training Accuracy at b 600 :  0.98
Training Accuracy at b 800 :  0.98
Training Accuracy at b 0 :  1.0
Training Accuracy at b 200 :  1.0
Training Accuracy at b 400 :  1.0
Training Accuracy at b 600 :  0.98
Tr