Problem 3

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


class SoftmaxLayer:
    def __init__(self, _input, _input_num, _classes_num):
        # weight
        self.W = tf.Variable(tf.zeros([_input_num, _classes_num]), dtype=tf.float32)
        # bias
        self.b = tf.Variable(tf.zeros([_classes_num,]), dtype=tf.float32)
        # output
        self.output = tf.nn.softmax(tf.matmul(_input, self.W) + self.b)
        # prediction
        self.y_pred = tf.argmax(self.output, axis=1)

    
class HiddenLayer:
    def __init__(self, _input, _input_num, _output_num, activ_func = tf.nn.sigmoid):
        # weight
        bound_val = 4.0*np.sqrt(6.0/(_input_num + _output_num))
        self.W = tf.Variable(tf.random_uniform([_input_num, _output_num], minval=-bound_val, maxval=bound_val),dtype=tf.float32, name="W")
        # bias
        self.b = tf.Variable(tf.zeros([_output_num,]), dtype=tf.float32, name="b")
        
        # output    print(mnist.test.images.shape)
        if activ_func is None:
            self.output = tf.matmul(_input, self.W) + self.b
        else:
            self.output = activ_func(tf.matmul(_input, self.W) + self.b)


class MLP:
    def __init__(self, _input, _input_num, _hidden_num, _output_num):
        # Set up the model
        last_output = _input
        last_output_num = _input_num
        
        self.hiddenlayer = []
        for i in range(len(_hidden_num)):
            self.hiddenlayer.append( HiddenLayer(last_output, _input_num = last_output_num, _output_num =_hidden_num[i]) )
            last_output = self.hiddenlayer[i].output
            last_output_num = _hidden_num[i]

        
        self.outputlayer = SoftmaxLayer(last_output, _input_num=last_output_num, _classes_num=_output_num)
        
        # prediction
        self.y_pred = self.outputlayer.y_pred


    def cross_entropy_loss(self, y):
        return -tf.reduce_mean(tf.reduce_sum(y * tf.log(self.outputlayer.output), axis=1))

    def accuracy(self, y):
        correct_pred = tf.equal(self.outputlayer.y_pred, tf.argmax(y, axis=1))
        return tf.reduce_mean(tf.cast(correct_pred, tf.float32))

(a)

In [6]:
if __name__ == "__main__":
    # load mnist dataset
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    
    # define training param
    epochs_num = 200
    batch_size = 100
    display_step = 10
    batch_num = int(mnist.train.num_examples / batch_size)
    
    
    # define input and output placehoders
    x = tf.placeholder(tf.float32, shape=[None, 784])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])
    # create mlp model
    multiclass_logistic_regressor = MLP(_input=x, _input_num=784, _hidden_num=[], _output_num=10)
    # get loss
    loss = multiclass_logistic_regressor.cross_entropy_loss(y_)
    # accuracy
    accuracy = multiclass_logistic_regressor.accuracy(y_)
    predictor = multiclass_logistic_regressor.y_pred
    
    train_op = tf.train.GradientDescentOptimizer(learning_rate=0.02).minimize(loss)
    init = tf.global_variables_initializer()

    print("Training...")
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(epochs_num):
            avg_loss = 0.0
            
            for i in range(batch_num):
                x_batch, y_batch = mnist.train.next_batch(batch_size)
                sess.run(train_op, feed_dict={x: x_batch, y_: y_batch})
                avg_loss += sess.run(loss, feed_dict={x: x_batch, y_: y_batch}) / batch_num

            if epoch % display_step == 0:
                val_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images,
                                                       y_: mnist.validation.labels})
                print("Epoch {0}: loss: {1}, validation accuacy: {2}".format(epoch,
                                                                            avg_loss, val_acc))

        print("Testing...")
        test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images,y_: mnist.test.labels})
        print("Test: accuacy: {0}".format(test_acc))


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Training...
Epoch 0: loss: 0.9155231214653358, validation accuacy: 0.8687999844551086
Epoch 10: loss: 0.33936947516419635, validation accuacy: 0.9121999740600586
Epoch 20: loss: 0.30947883848439556, validation accuacy: 0.9214000105857849
Epoch 30: loss: 0.29555775191296224, validation accuacy: 0.9236000180244446
Epoch 40: loss: 0.2870824030854482, validation accuacy: 0.9243999719619751
Epoch 50: loss: 0.2810914056951349, validation accuacy: 0.925000011920929
Epoch 60: loss: 0.2765442376651547, validation accuacy: 0.9265999794006348
Epoch 70: loss: 0.2729456642270089, validation accuacy: 0.926800012588501
Epoch 80: loss: 0.2700091463869269, validation accuacy: 0.9273999929428101
Epoch 90: loss: 0.26747991110790864, validation accuacy: 0.9269999861717224
Epoch 100: loss: 0.2654429981518875, valida

(b) MLP with one hidden layer

In [3]:

# create mlp model
mlp_classifier1 = MLP(_input=x, _input_num=784, _hidden_num=[500], _output_num=10)
# get loss
loss = mlp_classifier1.cross_entropy_loss(y_) 
# accuracy
accuracy = mlp_classifier1.accuracy(y_)
predictor = mlp_classifier1.y_pred

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.02).minimize(loss)
init = tf.global_variables_initializer()


print("Training...")
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epochs_num):
        avg_loss = 0.0
            
        for i in range(batch_num):
            x_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict={x: x_batch, y_: y_batch})
            avg_loss += sess.run(loss, feed_dict={x: x_batch, y_: y_batch}) / batch_num
                
        if epoch % display_step == 0:
            val_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images, y_: mnist.validation.labels})
            print("Epoch {0}: loss: {1}, validation accuacy: {2}".format(epoch, avg_loss, val_acc))

    print("Testing...")
    test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images,y_: mnist.test.labels})
    print("Test: accuacy: {0}".format(test_acc))


Training...
Epoch 0: loss: 1.22101874990897, validation accuacy: 0.8416000008583069
Epoch 10: loss: 0.33905528894879605, validation accuacy: 0.9124000072479248
Epoch 20: loss: 0.2913135877116159, validation accuacy: 0.9246000051498413
Epoch 30: loss: 0.26488797327334224, validation accuacy: 0.9318000078201294
Epoch 40: loss: 0.24438670064915322, validation accuacy: 0.9354000091552734
Epoch 50: loss: 0.22694157447327262, validation accuacy: 0.9387999773025513
Epoch 60: loss: 0.21118372019041678, validation accuacy: 0.9431999921798706
Epoch 70: loss: 0.19704378839243544, validation accuacy: 0.9473999738693237
Epoch 80: loss: 0.1840471007133073, validation accuacy: 0.949400007724762
Epoch 90: loss: 0.17254632462154784, validation accuacy: 0.9527999758720398
Epoch 100: loss: 0.1618204302611676, validation accuacy: 0.9544000029563904
Epoch 110: loss: 0.15227055092426875, validation accuacy: 0.9567999839782715
Epoch 120: loss: 0.14353096114640884, validation accuacy: 0.9584000110626221
Epoch

(c) MLP with two hidden layers

In [4]:
# create mlp model
mlp_classifier1 = MLP(_input=x, _input_num=784, _hidden_num=[500, 250], _output_num=10)
# get loss
loss = mlp_classifier1.cross_entropy_loss(y_) 
# accuracy
accuracy = mlp_classifier1.accuracy(y_)
predictor = mlp_classifier1.y_pred

train_op = tf.train.GradientDescentOptimizer(learning_rate=0.02).minimize(loss)
init = tf.global_variables_initializer()


print("Training...")
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epochs_num):
        avg_loss = 0.0
            
        for i in range(batch_num):
            x_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict={x: x_batch, y_: y_batch})
            avg_loss += sess.run(loss, feed_dict={x: x_batch, y_: y_batch}) / batch_num
                
        if epoch % display_step == 0:
            val_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images, y_: mnist.validation.labels})
            print("Epoch {0}: loss: {1}, validation accuacy: {2}".format(epoch, avg_loss, val_acc))

    print("Testing...")
    test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images,y_: mnist.test.labels})
    print("Test: accuacy: {0}".format(test_acc))

Training...
Epoch 0: loss: 1.7305880247462884, validation accuacy: 0.7505999803543091
Epoch 10: loss: 0.3323439566113735, validation accuacy: 0.9120000004768372
Epoch 20: loss: 0.2670271993496201, validation accuacy: 0.9265999794006348
Epoch 30: loss: 0.23142414401878017, validation accuacy: 0.9368000030517578
Epoch 40: loss: 0.2049292043393308, validation accuacy: 0.9426000118255615
Epoch 50: loss: 0.18306523636660793, validation accuacy: 0.9462000131607056
Epoch 60: loss: 0.16505721724168823, validation accuacy: 0.9513999819755554
Epoch 70: loss: 0.1499371575767343, validation accuacy: 0.9567999839782715
Epoch 80: loss: 0.1372461358728732, validation accuacy: 0.9593999981880188
Epoch 90: loss: 0.12612536076794978, validation accuacy: 0.9617999792098999
Epoch 100: loss: 0.11640308991413233, validation accuacy: 0.9631999731063843
Epoch 110: loss: 0.1080524612218141, validation accuacy: 0.9649999737739563
Epoch 120: loss: 0.10062915821644391, validation accuacy: 0.9664000272750854
Epoch