# MNIST with convolutional networks

## Reading the data

In [8]:
import numpy as np,tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
sns.set_palette("colorblind")
palette = sns.color_palette()
figsize = (10,10)
legend_fontsize = 16

In [4]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## Convolutions in TensorFlow

In [5]:
x = tf.placeholder(tf.float32, [None, 784]) 
y = tf.placeholder(tf.float32, [None, 10])
keep_probability = tf.placeholder(tf.float32)

x_image = tf.reshape(x, [-1,28,28,1])

W_conv_0 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b_conv_0 = tf.Variable(tf.constant(0.1, shape=[32]))

conv_0 = tf.nn.conv2d(x_image, W_conv_0, strides=[1, 1, 1, 1], padding='SAME') + b_conv_0

h_conv_0 = tf.nn.relu(conv_0)
h_pool_0 = tf.nn.max_pool(h_conv_0, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
h_pool_1_flat = tf.reshape(h_pool_0, [-1, 14*14*32])

W_fc_1 = tf.Variable(tf.truncated_normal([14*14*32, 10], stddev=0.1))
b_fc_1 = tf.Variable(tf.constant(0.1, shape=[10]))

logit_conv = tf.matmul(h_pool_1_flat, W_fc_1) + b_fc_1
y_conv = tf.nn.softmax(logit_conv)

In [6]:
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

l2 = tf.contrib.layers.l2_regularizer(0.1)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit_conv, labels=y)) + \
                    l2(W_conv_0) #+ l2(W_conv_1)# + l1(W_conv_0) + l1(W_conv_1) + l1(W_conv_2)

train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

In [7]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [9]:
for i in range(2001):
    batch_xs, batch_ys = mnist.train.next_batch(64)
    sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys, keep_probability: 0.5})
    if i % 50 == 0:
        test_xs, test_ys = mnist.test.next_batch(128)
        acc = sess.run(accuracy, feed_dict={x: test_xs, y: test_ys, keep_probability: 1.})
        print("Accuracy %s: %s" % (i, acc))
    if i > 0 and i % 1000 == 0:
        print("[%d]\ttrain accuracy\ttest accuracy=%.6f" % (i,
#             sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels}),
            sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
        ))

Accuracy 0: 0.109375
Accuracy 50: 0.28125
Accuracy 100: 0.578125
Accuracy 150: 0.609375
Accuracy 200: 0.75
Accuracy 250: 0.765625
Accuracy 300: 0.789063
Accuracy 350: 0.835938
Accuracy 400: 0.84375
Accuracy 450: 0.820313
Accuracy 500: 0.84375
Accuracy 550: 0.789063
Accuracy 600: 0.875
Accuracy 650: 0.867188
Accuracy 700: 0.84375
Accuracy 750: 0.898438
Accuracy 800: 0.875
Accuracy 850: 0.914063
Accuracy 900: 0.867188
Accuracy 950: 0.890625
Accuracy 1000: 0.875
[1000]	train accuracy	test accuracy=0.894100
Accuracy 1050: 0.875
Accuracy 1100: 0.945313
Accuracy 1150: 0.90625
Accuracy 1200: 0.882813
Accuracy 1250: 0.9375
Accuracy 1300: 0.914063
Accuracy 1350: 0.914063
Accuracy 1400: 0.929688
Accuracy 1450: 0.898438
Accuracy 1500: 0.921875
Accuracy 1550: 0.945313
Accuracy 1600: 0.890625
Accuracy 1650: 0.890625
Accuracy 1700: 0.914063
Accuracy 1750: 0.9375
Accuracy 1800: 0.914063
Accuracy 1850: 0.898438
Accuracy 1900: 0.921875
Accuracy 1950: 0.890625
Accuracy 2000: 0.914063
[2000]	train accura

## Two-layer convolutional network

In [10]:
x = tf.placeholder(tf.float32, [None, 784]) 
y = tf.placeholder(tf.float32, [None, 10])
keep_probability = tf.placeholder(tf.float32)

x_image = tf.reshape(x, [-1,28,28,1])

W_conv_0 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b_conv_0 = tf.Variable(tf.constant(0.1, shape=[32]))

conv_0 = tf.nn.conv2d(x_image, W_conv_0, strides=[1, 1, 1, 1], padding='SAME') + b_conv_0

h_conv_0 = tf.nn.relu(conv_0)
h_pool_0 = tf.nn.max_pool(h_conv_0, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

W_conv_1 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
b_conv_1 = tf.Variable(tf.constant(0.1, shape=[64]))

conv_1 = tf.nn.conv2d(h_pool_0, W_conv_1, strides=[1, 1, 1, 1], padding='SAME') + b_conv_1

h_conv_1 = tf.nn.relu(conv_1)
h_pool_1 = tf.nn.max_pool(h_conv_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
h_pool_1_flat = tf.reshape(h_pool_1, [-1, 7*7*64])

W_fc_2 = tf.Variable(tf.truncated_normal([7*7*64, 10], stddev=0.1))
b_fc_2 = tf.Variable(tf.constant(0.1, shape=[10]))

logit_conv = tf.matmul(h_pool_1_flat, W_fc_2) + b_fc_2
y_conv = tf.nn.softmax(logit_conv)

In [11]:
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

l2 = tf.contrib.layers.l2_regularizer(0.1)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit_conv, labels=y))
#+ \
 #                   l2(W_conv_0) #+ l2(W_conv_1)# + l1(W_conv_0) + l1(W_conv_1) + l1(W_conv_2)

train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

In [13]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [14]:
for i in range(2001):
    batch_xs, batch_ys = mnist.train.next_batch(64)
    sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys, keep_probability: 0.5})
    if i % 50 == 0:
        test_xs, test_ys = mnist.test.next_batch(128)
        acc = sess.run(accuracy, feed_dict={x: test_xs, y: test_ys, keep_probability: 1.})
        print("Accuracy %s: %s" % (i, acc))
    if i > 0 and i % 1000 == 0:
        print("[%d]\ttest accuracy=%.6f" % (i,
#             sess.run(accuracy, feed_dict={x: mnist.train.images, y: mnist.train.labels}),
            sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})
        ))

Accuracy 0: 0.15625
Accuracy 50: 0.890625
Accuracy 100: 0.914063
Accuracy 150: 0.9375
Accuracy 200: 0.9375
Accuracy 250: 0.945313
Accuracy 300: 0.96875
Accuracy 350: 0.953125
Accuracy 400: 0.976563
Accuracy 450: 0.953125
Accuracy 500: 0.96875
Accuracy 550: 0.921875
Accuracy 600: 0.992188
Accuracy 650: 0.976563
Accuracy 700: 0.96875
Accuracy 750: 0.96875
Accuracy 800: 0.96875
Accuracy 850: 0.976563
Accuracy 900: 0.96875
Accuracy 950: 0.984375
Accuracy 1000: 0.976563
[1000]	test accuracy=0.978200
Accuracy 1050: 0.976563
Accuracy 1100: 0.984375
Accuracy 1150: 0.984375
Accuracy 1200: 0.96875
Accuracy 1250: 0.96875
Accuracy 1300: 0.992188
Accuracy 1350: 0.992188
Accuracy 1400: 0.984375
Accuracy 1450: 0.992188
Accuracy 1500: 0.992188
Accuracy 1550: 0.953125
Accuracy 1600: 1.0
Accuracy 1650: 0.992188
Accuracy 1700: 0.984375
Accuracy 1750: 0.992188
Accuracy 1800: 0.992188
Accuracy 1850: 1.0
Accuracy 1900: 0.992188
Accuracy 1950: 0.984375
Accuracy 2000: 0.984375
[2000]	test accuracy=0.987500
