# MNIST Classification with Tensorflow

## Import modules, package, libraries

In [1]:
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

## Read in data and split into train and test sets

In [2]:
# Read in data from CSV files
train = pd.read_csv('ex4_train.csv', index_col=0)
test = pd.read_csv('ex4_test.csv', index_col=0)

# Split data into train and test sets
x_train, y_train = train.iloc[:, :-1].as_matrix(), train.iloc[:, -1].as_matrix()
x_test, y_test = test.iloc[:, :-1].as_matrix(), test.iloc[:, -1].as_matrix()

## Define functions for setting up the computation graph & the training process

In [3]:
def one_hot(y_train, y_test):
    """
        Create one hot encoding of class labels for both train and test data.
    """
    
    train_labels = tf.one_hot(indices=y_train, depth=NUM_CLASSES)
    test_labels = tf.one_hot(indices=y_test, depth=NUM_CLASSES)

    return train_labels, test_labels
    
def init_placeholders(x_train):
    """
        Create placeholders for the input data and the labels.
    """
    
    x_tensor = tf.placeholder(shape=(None, x_train.shape[1]), dtype=tf.float32, name='Input') 
    y_tensor = tf.placeholder(shape=(None, NUM_CLASSES), dtype=tf.int32, name='Labels')
    keep_prob = tf.placeholder(tf.float32)
    
    return x_tensor, y_tensor, keep_prob

def init_params(x_train):
    """
        Initialize the weights using the Xavier method and initialize biases to zero.
    """
    
    weight_shapes = [(x_train.shape[1], HIDDEN_NEURONS_1),
                 (HIDDEN_NEURONS_1, HIDDEN_NEURONS_2),
                 (HIDDEN_NEURONS_2, NUM_CLASSES)]
    
    bias_shapes = [(1, HIDDEN_NEURONS_1), (1, HIDDEN_NEURONS_2), (1, NUM_CLASSES)]
    
    W1, W2, W3 = [tf.get_variable(name='W{}'.format(i+1), shape=w, dtype=tf.float32,
                                  initializer=tf.contrib.layers.xavier_initializer(seed=1)) 
                  for i, w in enumerate(weight_shapes)]
    
    b1, b2, b3 = [tf.get_variable(name='b{}'.format(i+1), shape=b, dtype=tf.float32, 
                                  initializer=tf.zeros_initializer())
                  for i, b in enumerate(bias_shapes)]
    
    return (W1, W2, W3), (b1, b2, b3)

def forward(weights, biases, keep_prob):
    """
        Defines the forward pass computations.
    """
    
    W1, W2, W3 = weights
    b1, b2, b3 = biases
    
    a1 = tf.nn.relu(tf.add(tf.matmul(x_tensor, W1), b1), name='Hidden_1')
    a1 = tf.nn.dropout(a1, keep_prob)
    a2 = tf.nn.relu(tf.add(tf.matmul(a1, W2), b2), name='Hidden_2')
    a1 = tf.nn.dropout(a2, keep_prob)

    return tf.nn.sigmoid(tf.add(tf.matmul(a2, W3), b3), name='Output')

## Setup computation graph

In [4]:
NUM_ITERS=500
NUM_CLASSES = 10
HIDDEN_NEURONS_1 = 25
HIDDEN_NEURONS_2 = 25
KEEP_PROB = 0.5
LEARNING_RATE = 4e-5

# One-hot encode labels
one_hot_train, one_hot_test = one_hot(y_train, y_test)

# Initialize placeholders for input data and labels
x_tensor, y_tensor, keep_prob = init_placeholders(x_train)

# Initialize weights and biases
weights, biases = init_params(x_train)

# Define loss function
loss = tf.losses.softmax_cross_entropy(y_tensor, forward(weights, biases, keep_prob))

# Define optimizer
update = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

# Define prediction
predict = tf.argmax(forward(weights, biases, keep_prob=1.0), axis=1)

# Create initializer for graph variables
init = tf.global_variables_initializer()

## Train model

In [5]:
start = time.time()

with tf.Session(config=tf.ConfigProto(device_count={'CPU': 0, 'GPU': 1}, log_device_placement=True)) as sess: 
    sess.run(init)
       
    train_labels = sess.run(one_hot_train)
    test_labels = sess.run(one_hot_test)
    
    for i in range(NUM_ITERS):
        for x, y in zip(x_train, train_labels):
            sess.run(update, feed_dict={x_tensor: x.reshape(1, -1), 
                                        y_tensor: y.reshape(1, -1),
                                        keep_prob: KEEP_PROB})
        
        train_accuracy = np.mean(np.argmax(train_labels, axis=1) == 
                                 sess.run(predict, feed_dict={x_tensor: x_train.reshape(x_train.shape[0], -1), 
                                                              y_tensor: train_labels}))
        test_accuracy = np.mean(np.argmax(test_labels, axis=1) == 
                                sess.run(predict, feed_dict={x_tensor: x_test.reshape(x_test.shape[0], -1),
                                                             y_tensor: test_labels}))
        if (i+1) % 10 == 0:
            print 'Train accuracy: {:.2f}\nTest accuracy: {:.2f}\n'.format(100 * train_accuracy, 100 * test_accuracy)

       
print 'Training took {} seconds.'.format(time.time() - start)

InternalError: Failed to create session.