In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"
import tensorflow as tf

In [2]:
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
tf.logging.set_verbosity(old_v)

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz


In [3]:
# For randomly generating weights in acordance to a truncated normal distribution:
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(init_random_dist)

In [4]:
def init_bias(shape):
    init_bias_vals = tf.constant(0.1,shape=shape)
    return tf.Variable(init_bias_vals)

In [5]:
def conv2d(x, W):
    # x --> [batch, H, W, Channels]
    # W --> [filter H, filter W, Channels In, Channels OUT]
    # 'Same' == padding around edges with 0
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

In [6]:
def max_pool_2by2(x):
    #x -- > [batch,h,w,c]
    # Max pool is applied over 2x2 group of pixels in the same batch and channel:
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [7]:
# CONVOLUTIONAL LAYER:
def convolutional_layer(input_x,shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x,W)+b)

In [8]:
# FULLY CONNECTED/DENSE:
def normal_full_layer(input_layer,size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size,size])
    b = init_bias([size])
    
    return tf.matmul(input_layer,W) + b

In [9]:
# INPUT LAYER (array of 784 pixels):
x = tf.placeholder(tf.float32,shape=[None,784])

In [10]:
# We feed in the correct labels into y_true:
y_true = tf.placeholder(tf.float32,shape=[None,10])

In [11]:
# LAYERS:
# Reshape 1D input to the original 28x28 images:
x_image = tf.reshape(x,[-1,28,28, 1]) # WHY -1?


In [12]:
# Creates a convolutional layer for our graph:
# Each 5x5 group of pixels and 1 channel/colour generates 32 'features'
convo_1 = convolutional_layer(x_image,shape=[5,5,1,32])
convo_1_pooling = max_pool_2by2(convo_1) # Max pooling defined above

In [13]:
# Each of the 32 features for the 5x5 group of pixels each generates 64 new 'features':
#convo_2 = convolutional_layer(convo_1_pooling,shape=[5,5,32,64])
#convo_2_pooling = max_pool_2by2(convo_2)

In [14]:
convo_2_flat = tf.reshape(convo_1_pooling,[-1,6272])
# Fully connected to a flat 512 layer:
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,512))

In [15]:
# DROPOUT
# Probablity that the connection holds:
hold_prob = tf.placeholder(tf.float32)
# Dropout applied to final 512 values:
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [16]:
# Output are these 1024 values fully connected to the 10 possible outputs:
y_pred = normal_full_layer(full_one_dropout,10)

In [17]:
# Standard loss function for probability distributions:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true,logits=y_pred))

In [18]:
# Optimizer: Adam has a varying learning rate:
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)

In [19]:
init = tf.global_variables_initializer()

In [21]:
# Create Session:
steps = 3000
with tf.Session() as sess:
    sess.run(init)
    
    for step in range(1, steps+1):
        # TRAINING:
        # train with 50 images at a time:
        batch_x, correct_labels = mnist.train.next_batch(50) # Returns a tuple of 50 pixles(x) and correct labels for training
        # Dropout probability of 0.5, to help prevent overfitting:
        sess.run(train,feed_dict={x: batch_x,y_true: correct_labels, hold_prob:0.5})
        
        # Tests the current accuracy of the model after this many training steps:
        if (step%100 == 0):
            print("TEST ACCURACY AFTER STEP {}: ".format(step))
            
            # TESTING ACCURACY:
            matches = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
            acc = tf.reduce_mean(tf.cast(matches,tf.float32))
            
            print(sess.run(acc,feed_dict={x:mnist.test.images,y_true:mnist.test.labels,hold_prob:1.0}))
            print('\n')
    

TEST ACCURACY AFTER STEP 100: 
0.8961


TEST ACCURACY AFTER STEP 200: 
0.9268


TEST ACCURACY AFTER STEP 300: 
0.9439


TEST ACCURACY AFTER STEP 400: 
0.952


TEST ACCURACY AFTER STEP 500: 
0.9621


TEST ACCURACY AFTER STEP 600: 
0.9647


TEST ACCURACY AFTER STEP 700: 
0.9668


TEST ACCURACY AFTER STEP 800: 
0.9705


TEST ACCURACY AFTER STEP 900: 
0.9727


TEST ACCURACY AFTER STEP 1000: 
0.9752


TEST ACCURACY AFTER STEP 1100: 
0.9761


TEST ACCURACY AFTER STEP 1200: 
0.9761


TEST ACCURACY AFTER STEP 1300: 
0.9799


TEST ACCURACY AFTER STEP 1400: 
0.9801


TEST ACCURACY AFTER STEP 1500: 
0.9819


TEST ACCURACY AFTER STEP 1600: 
0.9816


TEST ACCURACY AFTER STEP 1700: 
0.9823


TEST ACCURACY AFTER STEP 1800: 
0.9819


TEST ACCURACY AFTER STEP 1900: 
0.9838


TEST ACCURACY AFTER STEP 2000: 
0.9818


TEST ACCURACY AFTER STEP 2100: 
0.9849


TEST ACCURACY AFTER STEP 2200: 
0.9816


TEST ACCURACY AFTER STEP 2300: 
0.9843


TEST ACCURACY AFTER STEP 2400: 
0.9816


TEST ACCURACY AFTER STEP 2

# > 98.5 % accuracy for the full training set after 3000 training steps