In [1]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt
import tensorflow as tf
import datetime
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
print(mnist.train.num_examples) # Number of training data
print(mnist.test.num_examples) # Number of test data

  from ._conv import register_converters as _register_converters


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
55000
10000


In [2]:
# Hyper-parameters
learningrate = 0.02#0.005
num_of_epochs=50
batchsize = 128
num_of_iters=55000/batchsize*num_of_epochs


noutput = 10

X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, noutput])

In [3]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')


def conv_net(x, weights, biases, dropout):
    # reshape input to 28x28 size
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # Convolution layer 1
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = tf.nn.relu(conv1)
    # Max pooling
    conv1 = maxpool2d(conv1, k=2)

    # Convolution layer 2
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = tf.nn.relu(conv2)
    # Max pooling
    conv2 = maxpool2d(conv2, k=2)

    # Fully connected layer
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)

    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

init_var=tf.contrib.layers.variance_scaling_initializer()
init_xavier=tf.contrib.layers.xavier_initializer()

weights = {
    'wc1': tf.get_variable("wc1", shape=[5, 5, 1, 32],initializer=init_var),
    'wc2': tf.get_variable("wc2", shape=[5, 5, 32, 64],initializer=init_var),
    'wd1': tf.get_variable("wd1", shape=[7*7*64, 1024],initializer=init_var),
    'out': tf.get_variable("out", shape=[1024,noutput],initializer=init_var)
}

biases = {
    'bc1': tf.Variable(tf.zeros([32])),
    'bc2': tf.Variable(tf.zeros([64])),
    'bd1': tf.Variable(tf.zeros([1024])),
    'out': tf.Variable(tf.zeros([noutput]))
}

In [4]:
model = conv_net(X, weights, biases, dropout=0.5)


In [5]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = model, labels=Y))
#optimizer = tf.train.AdamOptimizer(learning_rate=learningrate)
#optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
optimizer=tf.train.MomentumOptimizer(learning_rate=learningrate, momentum=0.99)

train_min = optimizer.minimize(loss)

correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


tf.summary.scalar("trainingLoss", loss)
merged_summary_op = tf.summary.merge_all()
filename = "./log/mnist_project_batchsize128" #_lr0p005
writer = tf.summary.FileWriter(filename, tf.get_default_graph())

init = tf.global_variables_initializer()

In [6]:
with tf.Session() as sess:
    sess.run(init)
    step=1
    while step <= num_of_iters:       
        batch_x, batch_y = mnist.train.next_batch(batchsize)  
        sess.run(train_min, feed_dict={X:batch_x, Y:batch_y})        
        losscalc, accuracycalc, merged_summary = sess.run([loss, accuracy,merged_summary_op], feed_dict={X:batch_x, Y:batch_y})
        if (step%10==1):
            print("Iteration: %d, Training Loss: %0.4f, Training Accuracy: %0.4f"%(step, losscalc, accuracycalc))
        writer.add_summary(merged_summary, step)
        step += 1
        
    test_x, test_y = mnist.test.next_batch(10000)
    losscalc, accuracycalc = sess.run([loss, accuracy], feed_dict={X:test_x, Y:test_y})
    print("Iterations: %d, Test Loss: %0.4f, Test Accuracy: %0.4f"%(step, losscalc, accuracycalc))

writer.close()               

Iteration: 1, Training Loss: 2.2580, Training Accuracy: 0.2109
Iteration: 11, Training Loss: 0.7471, Training Accuracy: 0.7891
Iteration: 21, Training Loss: 0.6078, Training Accuracy: 0.7891
Iteration: 31, Training Loss: 0.5925, Training Accuracy: 0.7812
Iteration: 41, Training Loss: 0.4588, Training Accuracy: 0.8828
Iteration: 51, Training Loss: 0.3728, Training Accuracy: 0.9062
Iteration: 61, Training Loss: 0.2104, Training Accuracy: 0.9375
Iteration: 71, Training Loss: 0.3469, Training Accuracy: 0.8984
Iteration: 81, Training Loss: 0.2322, Training Accuracy: 0.9453
Iteration: 91, Training Loss: 0.0722, Training Accuracy: 0.9766
Iteration: 101, Training Loss: 0.2482, Training Accuracy: 0.9453
Iteration: 111, Training Loss: 0.1617, Training Accuracy: 0.9766
Iteration: 121, Training Loss: 0.2618, Training Accuracy: 0.9219
Iteration: 131, Training Loss: 0.3769, Training Accuracy: 0.8750
Iteration: 141, Training Loss: 0.1596, Training Accuracy: 0.9453
Iteration: 151, Training Loss: 0.154

Iteration: 1261, Training Loss: 0.0750, Training Accuracy: 0.9844
Iteration: 1271, Training Loss: 0.0911, Training Accuracy: 0.9766
Iteration: 1281, Training Loss: 0.0768, Training Accuracy: 0.9688
Iteration: 1291, Training Loss: 0.1750, Training Accuracy: 0.9609
Iteration: 1301, Training Loss: 0.0373, Training Accuracy: 0.9922
Iteration: 1311, Training Loss: 0.0388, Training Accuracy: 0.9766
Iteration: 1321, Training Loss: 0.1040, Training Accuracy: 0.9688
Iteration: 1331, Training Loss: 0.0776, Training Accuracy: 0.9844
Iteration: 1341, Training Loss: 0.1334, Training Accuracy: 0.9688
Iteration: 1351, Training Loss: 0.1321, Training Accuracy: 0.9609
Iteration: 1361, Training Loss: 0.1730, Training Accuracy: 0.9688
Iteration: 1371, Training Loss: 0.0474, Training Accuracy: 0.9766
Iteration: 1381, Training Loss: 0.0427, Training Accuracy: 0.9766
Iteration: 1391, Training Loss: 0.2613, Training Accuracy: 0.9375
Iteration: 1401, Training Loss: 0.1412, Training Accuracy: 0.9531
Iteration:

Iteration: 2511, Training Loss: 0.0842, Training Accuracy: 0.9688
Iteration: 2521, Training Loss: 0.0101, Training Accuracy: 1.0000
Iteration: 2531, Training Loss: 0.0406, Training Accuracy: 0.9922
Iteration: 2541, Training Loss: 0.1168, Training Accuracy: 0.9688
Iteration: 2551, Training Loss: 0.0086, Training Accuracy: 0.9922
Iteration: 2561, Training Loss: 0.0669, Training Accuracy: 0.9766
Iteration: 2571, Training Loss: 0.0416, Training Accuracy: 0.9844
Iteration: 2581, Training Loss: 0.0278, Training Accuracy: 0.9922
Iteration: 2591, Training Loss: 0.0590, Training Accuracy: 0.9766
Iteration: 2601, Training Loss: 0.1191, Training Accuracy: 0.9453
Iteration: 2611, Training Loss: 0.0123, Training Accuracy: 1.0000
Iteration: 2621, Training Loss: 0.0471, Training Accuracy: 0.9766
Iteration: 2631, Training Loss: 0.0191, Training Accuracy: 0.9922
Iteration: 2641, Training Loss: 0.0183, Training Accuracy: 0.9922
Iteration: 2651, Training Loss: 0.0849, Training Accuracy: 0.9688
Iteration:

Iteration: 3761, Training Loss: 0.0935, Training Accuracy: 0.9766
Iteration: 3771, Training Loss: 0.0544, Training Accuracy: 0.9844
Iteration: 3781, Training Loss: 0.1145, Training Accuracy: 0.9531
Iteration: 3791, Training Loss: 0.1479, Training Accuracy: 0.9531
Iteration: 3801, Training Loss: 0.2045, Training Accuracy: 0.9453
Iteration: 3811, Training Loss: 0.1556, Training Accuracy: 0.9531
Iteration: 3821, Training Loss: 0.0332, Training Accuracy: 0.9922
Iteration: 3831, Training Loss: 0.1672, Training Accuracy: 0.9844
Iteration: 3841, Training Loss: 0.0503, Training Accuracy: 0.9766
Iteration: 3851, Training Loss: 0.1071, Training Accuracy: 0.9844
Iteration: 3861, Training Loss: 0.0706, Training Accuracy: 0.9688
Iteration: 3871, Training Loss: 0.2384, Training Accuracy: 0.9375
Iteration: 3881, Training Loss: 0.2501, Training Accuracy: 0.9609
Iteration: 3891, Training Loss: 0.0666, Training Accuracy: 0.9766
Iteration: 3901, Training Loss: 0.0722, Training Accuracy: 0.9766
Iteration:

Iteration: 5011, Training Loss: 0.0653, Training Accuracy: 0.9766
Iteration: 5021, Training Loss: 0.0021, Training Accuracy: 1.0000
Iteration: 5031, Training Loss: 0.1274, Training Accuracy: 0.9766
Iteration: 5041, Training Loss: 0.0227, Training Accuracy: 0.9922
Iteration: 5051, Training Loss: 0.2061, Training Accuracy: 0.9531
Iteration: 5061, Training Loss: 0.1204, Training Accuracy: 0.9766
Iteration: 5071, Training Loss: 0.0555, Training Accuracy: 0.9922
Iteration: 5081, Training Loss: 0.0984, Training Accuracy: 0.9766
Iteration: 5091, Training Loss: 0.0934, Training Accuracy: 0.9766
Iteration: 5101, Training Loss: 0.2916, Training Accuracy: 0.9609
Iteration: 5111, Training Loss: 0.1496, Training Accuracy: 0.9453
Iteration: 5121, Training Loss: 0.0516, Training Accuracy: 0.9844
Iteration: 5131, Training Loss: 0.2164, Training Accuracy: 0.9688
Iteration: 5141, Training Loss: 0.0434, Training Accuracy: 0.9922
Iteration: 5151, Training Loss: 0.0654, Training Accuracy: 0.9844
Iteration:

Iteration: 6261, Training Loss: 0.1743, Training Accuracy: 0.9141
Iteration: 6271, Training Loss: 0.3209, Training Accuracy: 0.9297
Iteration: 6281, Training Loss: 0.0748, Training Accuracy: 0.9609
Iteration: 6291, Training Loss: 0.1201, Training Accuracy: 0.9766
Iteration: 6301, Training Loss: 0.1418, Training Accuracy: 0.9609
Iteration: 6311, Training Loss: 0.0392, Training Accuracy: 0.9766
Iteration: 6321, Training Loss: 0.1369, Training Accuracy: 0.9688
Iteration: 6331, Training Loss: 0.1074, Training Accuracy: 0.9609
Iteration: 6341, Training Loss: 0.0981, Training Accuracy: 0.9531
Iteration: 6351, Training Loss: 0.1385, Training Accuracy: 0.9688
Iteration: 6361, Training Loss: 0.1413, Training Accuracy: 0.9766
Iteration: 6371, Training Loss: 0.0868, Training Accuracy: 0.9609
Iteration: 6381, Training Loss: 0.0716, Training Accuracy: 0.9844
Iteration: 6391, Training Loss: 0.0603, Training Accuracy: 0.9766
Iteration: 6401, Training Loss: 0.0842, Training Accuracy: 0.9609
Iteration:

Iteration: 7511, Training Loss: 0.0830, Training Accuracy: 0.9766
Iteration: 7521, Training Loss: 0.4614, Training Accuracy: 0.9453
Iteration: 7531, Training Loss: 0.1923, Training Accuracy: 0.9531
Iteration: 7541, Training Loss: 0.1949, Training Accuracy: 0.9531
Iteration: 7551, Training Loss: 0.1157, Training Accuracy: 0.9453
Iteration: 7561, Training Loss: 0.2863, Training Accuracy: 0.9531
Iteration: 7571, Training Loss: 0.2663, Training Accuracy: 0.9531
Iteration: 7581, Training Loss: 0.2806, Training Accuracy: 0.9688
Iteration: 7591, Training Loss: 0.0619, Training Accuracy: 0.9922
Iteration: 7601, Training Loss: 0.1738, Training Accuracy: 0.9609
Iteration: 7611, Training Loss: 0.1469, Training Accuracy: 0.9609
Iteration: 7621, Training Loss: 0.1468, Training Accuracy: 0.9688
Iteration: 7631, Training Loss: 0.1874, Training Accuracy: 0.9531
Iteration: 7641, Training Loss: 0.0846, Training Accuracy: 0.9766
Iteration: 7651, Training Loss: 0.4776, Training Accuracy: 0.9531
Iteration:

Iteration: 8761, Training Loss: 0.5426, Training Accuracy: 0.9297
Iteration: 8771, Training Loss: 0.3668, Training Accuracy: 0.9453
Iteration: 8781, Training Loss: 0.3064, Training Accuracy: 0.9453
Iteration: 8791, Training Loss: 0.3874, Training Accuracy: 0.9375
Iteration: 8801, Training Loss: 0.1824, Training Accuracy: 0.9609
Iteration: 8811, Training Loss: 0.2293, Training Accuracy: 0.9531
Iteration: 8821, Training Loss: 0.1231, Training Accuracy: 0.9688
Iteration: 8831, Training Loss: 0.1902, Training Accuracy: 0.9609
Iteration: 8841, Training Loss: 0.1688, Training Accuracy: 0.9688
Iteration: 8851, Training Loss: 0.5014, Training Accuracy: 0.9453
Iteration: 8861, Training Loss: 0.3398, Training Accuracy: 0.9688
Iteration: 8871, Training Loss: 0.3790, Training Accuracy: 0.9688
Iteration: 8881, Training Loss: 0.6026, Training Accuracy: 0.9688
Iteration: 8891, Training Loss: 0.3322, Training Accuracy: 0.9531
Iteration: 8901, Training Loss: 0.2488, Training Accuracy: 0.9297
Iteration:

KeyboardInterrupt: 