## Setup

First we import the necessary modules and data.

In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline

In [2]:
from mnist_helper import read
train_data = list(read(dataset="training", path="images/"))
test_data = list(read(dataset="testing", path="images/"))

In [3]:
def flatten_image(img):
    return [px for row in img for px in row]

In [4]:
def flatten_X(X):
    return np.array([flatten_image(img) for img in X]).reshape(784, len(X))

In [5]:
def one_hot(Y):
    hot = np.zeros((10, len(Y)))
    for i in range(len(Y)):
        hot[Y[i]][i] = 1
    return hot

In [6]:
Y_train = one_hot([pair[0] for pair in train_data[0:50000]])
Y_dev = one_hot([pair[0] for pair in train_data[50000:60000]])
Y_test = one_hot([pair[0] for pair in test_data])

In [7]:
X_train = flatten_X([pair[1] for pair in train_data[0:50000]])
X_dev = flatten_X([pair[1] for pair in train_data[50000:60000]])
X_test = flatten_X([pair[1] for pair in test_data])

## Building the Network

Next we construct the network architecture (zero hidden layers) and define a few helpful functions.

In [8]:
def get_parameters():
    X = tf.placeholder(dtype=tf.float32, shape=[784, None], name="X")
    Y = tf.placeholder(dtype=tf.float32, shape=[10, None], name="Y")
    W = tf.Variable(dtype=tf.float32, initial_value=np.random.randn(10, 784), name="W")
    b = tf.Variable(dtype=tf.float32, initial_value=tf.zeros([10, 1]), name="b")
    return (X, Y, W, b)

In [9]:
def get_cost(Z, Y):
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Z, labels=Y))   
    return cost

In [19]:
def forward_prop(X, W, b):
    return tf.add(tf.matmul(W, X), b)

## Model

Finally we construct the gradient descent optimizer.

In [28]:
def fit_model(X_train, Y_train, num_iterations=100, learning_rate=0.001):
    tf.reset_default_graph()
    costs = []
    X, Y, W, b = get_parameters()
    Z = forward_prop(X, W, b)
    cost = get_cost(Z, Y)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        
        for iteration in range(num_iterations):
            opt, iter_cost = sess.run([optimizer, cost], feed_dict={X:X_train, Y:Y_train})
            costs.append(iter_cost)
            if (iteration % 10 == 0):
                print("Iteration %s: cost = %s" % (iteration, iter_cost))
        
        Z = sess.run(Z)
        correct_pct = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(Z), tf.argmax(Y)), "float"))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("accuracy: %s" % accuracy)

In [None]:
fit_model(X_train, Y_train)