In [77]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot = True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [78]:
mnist

Datasets(train=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x11b0507b8>, validation=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x11c09a278>, test=<tensorflow.contrib.learn.python.learn.datasets.mnist.DataSet object at 0x11c09a208>)

In [79]:
import tensorflow as tf
session = tf.InteractiveSession()

In [80]:
# shape = [a, b] means that we have a data points and each is b-dimensional
# we write None here to allow for an arbitrary number of data points
x = tf.placeholder(tf.float32, shape=[None, 784])

# this is 10 dimensional because the ith dimension is the probability of the ith digit
# since y_ is the actual value, it'll look something like [0, ... 0, 1, 0, ... 0 ]
# AKA one shot vectors representing the class of the data point
y_ = tf.placeholder(tf.float32, shape=[None, 10])


In [81]:
# A matrix (linear transformation) that takes points from 784 dim land to 10 dim land
W = tf.Variable(tf.zeros([784, 10]))
# biases because our data isn't centered
b = tf.Variable(tf.zeros([10]))

# I don't understand why the order of the multiple is this instead of matmul(W, x)
y = tf.matmul(x, W) + b

session.run(tf.global_variables_initializer())

In [82]:
# Our error function
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = y))

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)


In [83]:
mnist.train.next_batch(1)
# data looks like [all_the_data, all_the_labels]

(array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.

In [84]:
for _ in  range(1000):
    batch = mnist.train.next_batch(100)
    train_step.run(feed_dict = {x: batch[0], y_: batch[1]} )

In [85]:
# Creates a list of booleans
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print(accuracy.eval(feed_dict = {x: mnist.test.images, y_: mnist.test.labels}))

0.9183


In [86]:

# Convenience functions for initializing weights to non-zeros 

def weight_variable(shape):
    # truncated normal is just the normal distribution with the ends bounded (relative to stddev hopefully)
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

# Positive bias because we'll be using ReLu's

def bias_variable(shape):
    initial = tf.constant(.1, shape = shape)
    return tf.Variable(initial)


In [87]:

def conv2d(x, W):
    # with stride length 1 and zero padding, the output of this is the same shape as x
    return tf.nn.conv2d(x, W, strides = [1, 1, 1, 1], padding = "SAME")

def max_pool_2x2(x):
    # ksize corresponds to the size of the window and strides is how we move that window
    # ksize is [1, 2, 2, 1] because we work on one image at a time, look at pixels 2x2, and there is only 1 channel
    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")



In [88]:
# 1st convolution layer
W_conv1 = weight_variable([5, 5, 1, 32]) # 5x5 window, 1 input channel, 32 windows in total
b_conv1 = bias_variable([32])

x_image = tf.reshape(x, [-1, 28, 28, 1]) # Unknown number of images, 28x28 images, 1 channel (greyscale)

# What we're doing here is
# image => convolve it => add biases => relu it => max_pool it
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)


In [89]:
# 2nd convolution layer
W_conv2 = weight_variable([5, 5, 32, 64]) # 5x5x32 windows, 64 windows
b_conv2 = bias_variable([64])

# 14x14x32 space => 7x7x64 space

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [90]:

# 7x7x64 space => 1024 space
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) # flatten the n x n from the pool to a n^2 x 1 array
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [91]:

# Add dropout to prevent overfitting
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)


In [92]:

# Finally map to 10 dimensional space
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2


In [93]:
# Error term

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = y_conv))
# Some better backprop optimizer..
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)


In [94]:
# Calculating accuracy

correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))






In [95]:
# Train the thing!

session.run(tf.global_variables_initializer())

for i in range(250):
    batch = mnist.train.next_batch(50)
    train_step.run(feed_dict = {x: batch[0], y_: batch[1], keep_prob: 0.5})
    if i % 10 == 0:
        # don't drop out when evaluating the model
        train_accuracy = accuracy.eval({x: batch[0], y_: batch[1], keep_prob: 1.0})
        print ("step: %d, accuracy on training data: %g" % (i, train_accuracy))

step: 0, accuracy on training data: 0.06
step: 10, accuracy on training data: 0.2
step: 20, accuracy on training data: 0.44
step: 30, accuracy on training data: 0.64
step: 40, accuracy on training data: 0.68
step: 50, accuracy on training data: 0.76
step: 60, accuracy on training data: 0.82
step: 70, accuracy on training data: 0.76
step: 80, accuracy on training data: 0.78
step: 90, accuracy on training data: 0.86
step: 100, accuracy on training data: 0.84
step: 110, accuracy on training data: 0.9
step: 120, accuracy on training data: 0.96
step: 130, accuracy on training data: 0.86
step: 140, accuracy on training data: 0.96
step: 150, accuracy on training data: 0.94
step: 160, accuracy on training data: 0.88
step: 170, accuracy on training data: 0.94
step: 180, accuracy on training data: 0.86
step: 190, accuracy on training data: 0.9
step: 200, accuracy on training data: 0.9
step: 210, accuracy on training data: 0.92
step: 220, accuracy on training data: 0.94
step: 230, accuracy on tra

In [96]:
accuracy.eval(feed_dict = {x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})

0.92430001

In [106]:
# What if we try generating images by training on the input itself!
random_image = tf.Variable(tf.truncated_normal([1, 28, 28, 1], stddev = 0.1), name = "random_image_var")

h_conv1 = tf.nn.relu(conv2d(random_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) # flatten the n x n from the pool to a n^2 x 1 array
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = y_conv))
train_step2 = tf.train.AdamOptimizer(1e4).minimize(cross_entropy, var_list = [random_image])

# uninitialized_vars = []
# for var in tf.all_variables():
#     if (tf.is_variable_initialized(var) == False):
#         uninitialized_vars.append(var)

session.run(tf.initialize_variables([random_image]))
        
    
import numpy as np

one_shot = np.zeros([10])
one_shot[0] = 1
session.run(tf.global_variables_initializer())

for i in range(10):
    train_step2.run(feed_dict = {x: [np.zeros(28 * 28)], y_: [one_shot], keep_prob: 1.0})


Instructions for updating:
Use `tf.variables_initializer` instead.
