In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import tensorflow as tf

# Import and inspect data

## Import

In [6]:
def import_data():
    """Import training and testing data, as data and labels"""
    
    # Import in pandas (np.genfromtxt is too slow)
    tr_pd = pd.read_csv('data_train.csv', header=None)
    tst_pd = pd.read_csv('data_test.csv', header=None)
    
    # Split data and labels
    tr_data = tr_pd.loc[:, 1:].values
    tr_labels = tr_pd.loc[:, 0].values
    tst_data = tst_pd.loc[:, 1:].values
    tst_labels = tst_pd.loc[:, 0].values
    
    return tr_data, tst_data, tr_labels, tst_labels

In [7]:
def make_onehot(labels):
    """Change numbers 0-9 to unit vectors along the first 10 axes."""
    
    labels_onehot = np.zeros(shape=(labels.shape[0], 10))
    for num in range(labels.shape[0]):
        labels_onehot[num, labels[num]] = 1.0
    
    return labels_onehot

In [8]:
g_tr_data, g_tst_data, g_tr_labels, g_tst_labels = import_data()
g_tr_data = g_tr_data/255
g_tst_data = g_tst_data/255
g_tr_labels_oh = make_onehot(g_tr_labels)
g_tst_labels_on = make_onehot(g_tst_labels)

## Inspect

In [9]:
def inspect_number(data, labels, imagenum, probs=None):
    """Make a function that displays the image. If a vector of onehot vectors is 
    also inputted, then a bar plot of these probabilities is shown."""
    
    # Reshape the datasets
    plot_data = np.reshape(data, newshape=(data.shape[0], 28, 28))
    
    # If there are no probabilites entered, just plot the picture
    if probs == None:
        print(labels[imagenum])
        fig, ax = plt.subplots()
        ax.imshow(plot_data[imagenum], cmap='Greys')
        plt.show()
    
    # When probabilities are entered, plot the picture and the 
    # probability that it is each number
    else:
        print(labels[imagenum])
        fig, (ax1, ax2) = plt.subplots(1, 2)
        ax1.imshow(plot_data[imagenum], cmap='Greys')
        ax2.bar(np.arange(10), probs)
        asp = np.diff(ax2.get_xlim())[0] / np.diff(ax2.get_ylim())[0]
        ax2.set_aspect(asp)
        plt.show()

# Build MLP in Tensorflow

In [10]:
tf.reset_default_graph()    # Sometimes solves issues 

In [19]:
class MLP():
    def __init__(self, learn_param):
        self.learn_param = learn_param
        
        self.init = tf.global_variables_initializer()
        self.inp = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='inputs')
        self.tgt = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='targets')
        
        self.sess = tf.Session()
        self._build_graph()
        
    
    def _build_graph(self):
        initializer = tf.glorot_uniform_initializer
        
        # The neural network
        h = tf.layers.dense(self.inp, 128, kernel_initializer=initializer, \
                activation=tf.tanh, name='1')
        h = tf.layers.dense(h, 64, kernel_initializer=initializer, \
                activation=tf.tanh, name='2')
        logits = tf.layers.dense(h, 10, kernel_initializer=initializer, \
                activation=tf.identity, name='3')
        softmax = tf.nn.softmax(logits)
        
        CE = tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.tgt, logits=logits)
        cost = tf.reduce_mean(CE)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learn_param).minimize(cost)
        
        

In [20]:
model = MLP(learn_param=0.1)

AttributeError: module 'tensorflow' has no attribute 'glorot_uniform_initializer'

In [None]:
tr_in = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='inputs')
tr_tgt = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='targets')
tst_in = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='inputs')
tst_tgt = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='targets')

epochs = 5
learnrate = 0.01

In [18]:
init = tf.global_variables_initializer()

In [None]:
"""Train the neural network."""

with tf.Session() as sess:
    
    num_tr = g_tr_data.shape[0]
    num_tst = g_tst_data.shape[0]
    
    batch_size = 100   # Must divide num_tr
    num_batches = int(num_tr / batch_size + 0.00001)
    # Error metrics are recorded every batch_recordspace batches
    batch_recordspace = 50
    
    # Create a pandas DataFrame where the training and testing error is put
    columns = ['epoch', 'batch', 'training', 'testing']
    num_rows = int(num_batches * epochs / batch_recordspace + 0.00001)
    index = np.arange(num_rows)
    Prop_Correct = pd.DataFrame(np.zeros((num_rows,4)), index=index, columns=columns)
    
    # Initialise all variables
    sess.run(init)
    
    start_time = time.time()
    
    # Do the training, epoch is one run through full dataset
    for epoch in range(epochs):
        # One training iteration per batch
        for batch in range(num_batches):
            
            # Input the batch of training and testing data
            range_left = batch * batch_size
            range_right = (batch + 1) * batch_size
            feed_dict = {
                    tr_in: g_tr_data[range_left:range_right], \
                    tr_tgt: g_tr_labels_vc[range_left:range_right], \
                    tst_in: g_tst_data, \
                    tst_tgt: g_tst_labels_vc \
                    }
        
            # Run one iteration of neural network trainer
            # Inputs
            iter_inputs = [
                    optimizer, \
                    tr_cost, tr_sm, tr_tgt, \
                    tst_cost, tst_sm, tst_tgt \
                    ]
            # Run it
            iter_outputs = sess.run(iter_inputs, feed_dict=feed_dict)
            # Outputs
            _ = iter_outputs[0]
            out_tr_cost, out_tr_sm, out_tr_tgt = iter_outputs[1:4]
            out_tst_cost, out_tst_sm, out_tst_tgt = iter_outputs[4:7]
            
        
            # Find the proportion of correctly identified numbers in training dataset
            tr_truth = np.argmax(out_tr_tgt, axis=1)
            tr_nn_guess = np.argmax(out_tr_sm, axis=1)
            tr_num_correct = batch_size - len(np.nonzero(tr_nn_guess - tr_truth)[0])
            tr_fraction_correct = tr_num_correct / batch_size
        
            # Find the proportion of correctly identified numbers in testing dataset
            tst_truth = np.argmax(out_tst_tgt, axis=1)
            tst_nn_guess = np.argmax(out_tst_sm, axis=1)
            tst_num_correct = num_tst - len(np.nonzero(tst_nn_guess - tst_truth)[0])
            tst_fraction_correct = tst_num_correct / num_tst
            
            # Print and input some outputs (training and testing errors)
            if batch % batch_recordspace == 0:
#                print(epoch, batch, tr_fraction_correct, tst_fraction_correct)
                row_num = int((epoch * num_batches + batch) / batch_recordspace + 0.00001)
                Prop_Correct.loc[row_num, :] = [ \
                        epoch, batch, tr_fraction_correct, tst_fraction_correct]
    
    # Output a pandas DataFrame with the training and testing errors
#    Prop_Correct.to_csv('2HL.csv')
    
    end_time = time.time()
    print('Time taken: {}s'.format(end_time - start_time))
    
    
    # Calculate the number correct across the whole dataset
            
    # Input the batch of training and testing data
    feed_dict = {
            tr_in: g_tr_data, \
            tr_tgt: g_tr_labels_vc, \
            tst_in: g_tst_data, \
            tst_tgt: g_tst_labels_vc \
            }
        
    # Run neural network again on whole training dataset
    run_inputs = [tr_sm, tr_tgt, tst_sm, tst_tgt]
    run_outputs = sess.run(run_inputs, feed_dict=feed_dict)
    out_tr_sm, out_tr_tgt, out_tst_sm, out_tst_tgt = run_outputs
            
    # Find the proportion of correctly identified numbers in training dataset
    # (The testing dataset has already been done)
    tr_truth = np.argmax(out_tr_tgt, axis=1)
    tr_nn_guess = np.argmax(out_tr_sm, axis=1)
    tr_num_correct = num_tr - len(np.nonzero(tr_nn_guess - tr_truth)[0])
    tr_fraction_correct = tr_num_correct / num_tr
            
    print('Correct, training dataset: {}'.format(tr_fraction_correct))
    print('Correct, testing dataset: {}'.format(tst_fraction_correct)) 
    
    

In [None]:
"""Create the neural network"""

# Layers of the neural network, training
tr_h = tf.layers.dense(tr_in, 128, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.tanh, name='1')
tr_h = tf.layers.dense(tr_h, 64, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.tanh, name='2')
tr_logits = tf.layers.dense(tr_h, 10, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.identity, name='3')

# Layers of the neural network, testing
tst_h = tf.layers.dense(tst_in, 128, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.tanh, name='1', reuse=True)
tst_h = tf.layers.dense(tst_h, 64, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.tanh, name='2', reuse=True)
tst_logits = tf.layers.dense(tst_h, 10, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.identity, name='3', reuse=True)

# Turn output layer into softmax job
tr_sm = tf.nn.softmax(tr_logits)
tst_sm = tf.nn.softmax(tst_logits)

# Calculate the cost and run the optimiser
tr_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tr_tgt, logits=tr_logits))
tst_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tst_tgt, logits=tst_logits))
optimizer = tf.train.AdamOptimizer(learning_rate=learnrate).minimize(tr_cost)

In [None]:
"""Calculate number of variables"""

len(tf.global_variables())

In [None]:
"""Important results:

                        1 hidden   2 hidden
                        layer      layers
                        
Number of parameters:   14         20
Time taken (5 epochs):  172s       223s
Final training error:   0.971      0.956
Final testing error:    0.962      0.948

"""

In [None]:
"""Plot training and testing errors"""

fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
errors_1HL = pd.read_csv('1HL.csv', index_col=[0])
errors_2HL = pd.read_csv('2HL.csv', index_col=[0])

X = (np.arange(60) + 1) / 12
tr_1HL = errors_1HL.loc[:, 'training']
tst_1HL = errors_1HL.loc[:, 'testing']
tr_2HL = errors_2HL.loc[:, 'training']
tst_2HL = errors_2HL.loc[:, 'testing']
ax1.plot(X, tr_1HL)
ax1.plot(X, tst_1HL)
ax2.plot(X, tr_2HL)
ax2.plot(X, tst_2HL)
ax1.set_xlabel('Runs through dataset')
ax2.set_xlabel('Runs through dataset')
ax1.set_ylabel('Proportion correct')
ax1.set_title('1 Hidden layer')
ax2.set_title('2 Hidden layers')
ax1.legend()
ax2.legend()
plt.savefig('Errors')

In [21]:
? tf