In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import tensorflow as tf

# Import and inspect data

## Import

In [6]:
def import_data():
    """Import training and testing data, as data and labels"""
    
    # Import in pandas (np.genfromtxt is too slow)
    tr_pd = pd.read_csv('../week_2/data_train.csv', header=None)
    tst_pd = pd.read_csv('../week_2/data_test.csv', header=None)
    
    # Split data and labels
    tr_data = tr_pd.loc[:, 1:].values
    tr_labels = tr_pd.loc[:, 0].values
    tst_data = tst_pd.loc[:, 1:].values
    tst_labels = tst_pd.loc[:, 0].values
    
    return tr_data, tst_data, tr_labels, tst_labels

In [7]:
def make_onehot(labels):
    """Change numbers 0-9 to unit vectors along the first 10 axes."""
    
    labels_onehot = np.zeros(shape=(labels.shape[0], 10))
    for num in range(labels.shape[0]):
        labels_onehot[num, labels[num]] = 1.0
    
    return labels_onehot

In [8]:
g_tr_data, g_tst_data, g_tr_labels, g_tst_labels = import_data()
g_tr_data = g_tr_data/255
g_tst_data = g_tst_data/255
g_tr_labels_oh = make_onehot(g_tr_labels)
g_tst_labels_oh = make_onehot(g_tst_labels)

## Inspect

In [17]:
def inspect_number(data, labels, imagenum, probs=None):
    """Make a function that displays the image. If a vector of onehot vectors is 
    also inputted, then a bar plot of these probabilities is shown."""
    
    # Reshape the datasets
    plot_data = np.reshape(data, newshape=(data.shape[0], 28, 28))
    
    # If there are no probabilites entered, just plot the picture
    if probs == None:
        print(labels[imagenum])
        fig, ax = plt.subplots()
        ax.imshow(plot_data[imagenum], cmap='Greys')
        plt.show()
    
    # When probabilities are entered, plot the picture and the 
    # probability that it is each number
    else:
        print(labels[imagenum])
        fig, (ax1, ax2) = plt.subplots(1, 2)
        ax1.imshow(plot_data[imagenum], cmap='Greys')
        ax2.bar(np.arange(10), probs)
        asp = np.diff(ax2.get_xlim())[0] / np.diff(ax2.get_ylim())[0]
        ax2.set_aspect(asp)
        plt.show()

# Build CNN in Tensorflow

In [13]:
class CNN():
    def __init__(self, learn_param):
        self.learn_param = learn_param
        
        self.inp = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='inputs')
        self.tgt = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='targets')
        self.is_training = tf.placeholder(tf.bool, name='is_training')
        self.sess = tf.Session()
        self._build_graph()
        self.sess.run(tf.initializers.global_variables())

    def _build_graph(self):
        """Build the neural network"""
        initializer = tf.glorot_uniform_initializer()
        
        # Reshape for convolutional layer
#        h = tf.reshape(self.inp, shape=[-1, 28, 28, 1])    
        
        # First convolutional and pooling layer
#        h = tf.layers.conv2d(h, 32, kernel_size=[5, 5], padding='same', 
#                             kernel_initializer=initializer, activation=tf.nn.relu, 
#                             name='conv1')
#        h = tf.layers.max_pooling2d(h, pool_size=[2, 2], strides=2, name='pool1')
        
        # Second convolutional layer
#        h = tf.layers.conv2d(h, 64, kernel_size=[5, 5], padding='same',
#                             kernel_initializer=initializer, activation=tf.nn.relu, 
#                             name='conv2')
#        h = tf.layers.max_pooling2d(h, pool_size=[2, 2], strides=2)
        
        # Dense layer
#        h = tf.reshape(h, [-1, 7*7*64])
        h = self.inp
        h = tf.layers.dense(h, 1024, kernel_initializer=initializer, activation=tf.nn.relu, 
                            name='dense1')
#        h = tf.layers.dropout(h, rate=0.4, training=self.is_training, name='dropout1')
        self.logits = tf.layers.dense(h, 10, kernel_initializer=initializer, name='logits')
        self.softmax = tf.nn.softmax(self.logits)
        
        # Optimiser: softmax cross entropy
        CE = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.tgt)
        self.cost = tf.reduce_mean(CE)
        self.optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learn_param).minimize(self.cost)
        
    def make_minibatch(self, data, labels, batch_size, batch_num):
        """Form a minibatch from the data"""
        llim = batch_num * batch_size
        rlim = (batch_num + 1) * batch_size
        return data[llim:rlim], labels[llim:rlim]
    
    def evaluate_MLP_outputs(self, data):
        """Run the feedforward NN on some data"""
        feed_dict = {self.inp: data, self.is_training: False}
        outputs = self.sess.run(self.softmax, feed_dict=feed_dict)
        return outputs
    
    def calc_frac_correct(self, data, labels):
        """Determine the number of correctly idenfitied pictures across a dataset"""    
        num_pts = data.shape[0]
        estimates_sm = self.evaluate_MLP_outputs(data)
        estimates = np.argmax(estimates_sm, axis=1)    # Argmax over softmax = estimates
        fraction_correct = 1 - len(np.nonzero(estimates - labels)[0])/num_pts
        return fraction_correct
    
    def train_iteration(self, data, labels_oh):
        """Do one training iteration"""
        feed_dict = {self.inp: data, self.tgt: labels_oh, self.is_training: True}
        _, out = self.sess.run([self.optimizer, self.softmax], feed_dict=feed_dict)
        
    def train_full(self, tr_data, tr_labels, tst_data, tst_labels, batch_size, num_epochs):
        """Train neural network and keep track of progress by (cross) validating"""
        
        tr_labels_oh = make_onehot(tr_labels)
        tst_labels_oh = make_onehot(tst_labels)
        
        # batch_size must divide num_tr_points
        num_tr_pts = tr_data.shape[0]
        num_tst_pts = tst_data.shape[0]
        num_batches = int(round(num_tr_pts / batch_size)) 
        
        # Train the MLP and keep track of the errors across training and testing datasets
        batch_nums, epochs, tr_accs, tst_accs = \
                np.array([]), np.array([]), np.array([]), np.array([])
        for epoch in range(num_epochs):
            for batch_num in range(num_batches):
                # Input proportion correctly identified across training and testing dataset
                if batch_num % 50 == 0:
                    tr_acc = self.calc_frac_correct(tr_data, tr_labels)
                    tst_acc = self.calc_frac_correct(tst_data, tst_labels)
                    print('Training accuracy after {} batches: {}'.format(batch_num, tr_acc))
                    epochs = np.append(epochs, epoch)
                    batch_nums = np.append(batch_nums, batch_num)
                    tr_accs = np.append(tr_accs, tr_acc)
                    tst_accs = np.append(tst_accs, tst_acc)
                
                # Do the training
                mb_data, mb_labels_oh = self.make_minibatch(tr_data, tr_labels_oh, \
                        batch_size=batch_size, batch_num=batch_num)    # Make minibatch
                self.train_iteration(mb_data, mb_labels_oh)    # Do single training iteration
                
        accuracies = np.vstack((epochs, batch_nums, tr_accs, tst_accs)).T
        return accuracies

# Train model and evaluate

In [None]:
tf.reset_default_graph()    # Sometimes solves issues

# Create the model class
def train_CNN():
    model = CNN(learn_param=0.001)
    accuracies = model.train_full(g_tr_data, g_tr_labels, g_tst_data, g_tst_labels, 100, 1)
#    accuracies = pd.DataFrame(accuracies, columns = ['epoch', 'batch', 'train', 'test'])
#    return accuracies

# Train it
start_time = time.time()
train_CNN()
end_time = time.time()
print("Training time: {}s".format(end_time - start_time))

Training accuracy after 0 batches: 0.10029999999999994
Training accuracy after 50 batches: 0.8937333333333334
Training accuracy after 100 batches: 0.9091166666666667
Training accuracy after 150 batches: 0.9246
Training accuracy after 200 batches: 0.9377833333333333
Training accuracy after 250 batches: 0.9482166666666667
Training accuracy after 300 batches: 0.9467166666666667
Training accuracy after 350 batches: 0.9573666666666667


In [None]:
"""Important results:

                        1 hidden   2 hidden
                        layer      layers
                        
Number of parameters:   14         20
Time taken (5 epochs):  172s       223s
Final training error:   0.971      0.956
Final testing error:    0.962      0.948

"""

In [None]:
"""Plot training and testing errors"""

fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
errors_1HL = pd.read_csv('1HL.csv', index_col=[0])
errors_2HL = pd.read_csv('2HL.csv', index_col=[0])

X = (np.arange(60) + 1) / 12
tr_1HL = errors_1HL.loc[:, 'training']
tst_1HL = errors_1HL.loc[:, 'testing']
tr_2HL = errors_2HL.loc[:, 'training']
tst_2HL = errors_2HL.loc[:, 'testing']
ax1.plot(X, tr_1HL)
ax1.plot(X, tst_1HL)
ax2.plot(X, tr_2HL)
ax2.plot(X, tst_2HL)
ax1.set_xlabel('Runs through dataset')
ax2.set_xlabel('Runs through dataset')
ax1.set_ylabel('Proportion correct')
ax1.set_title('1 Hidden layer')
ax2.set_title('2 Hidden layers')
ax1.legend()
ax2.legend()
plt.savefig('Errors')

In [10]:
tr_in = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='inputs')
tr_tgt = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='targets')
tst_in = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='inputs')
tst_tgt = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='targets')

epochs = 5
learnrate = 0.01

In [11]:
"""Create the neural network"""

# Layers of the neural network, training

# Reshape data into the shape of pictures for convolutional layers
tr_in_rs = tf.reshape(tr_in, shape=[-1, 28, 28, 1])

# First convolutional layer
tr_h = tf.layers.conv2d(tr_in_rs, filters=32, kernel_size=[5, 5], \
        kernel_initializer=tf.glorot_uniform_initializer(), \
        padding="same", activation=None, name='conv1')
tr_h = tf.layers.batch_normalization(tr_h, training=True, name='bn1')
tr_h = tf.nn.relu(tr_h)
tr_h = tf.layers.max_pooling2d(tr_h, pool_size=[2, 2], strides=2, \
        name='pool1')

# Second convolution layer
tr_h = tf.layers.conv2d(tr_h, filters=32, kernel_size=[5, 5], \
        kernel_initializer=tf.glorot_uniform_initializer(), \
        padding="same", activation=None, name='conv2')
tr_h = tf.layers.batch_normalization(tr_h, training=True, name='bn2')
tr_h = tf.nn.relu(tr_h)
tr_h = tf.layers.max_pooling2d(tr_h, pool_size=[2, 2], strides=2, \
        name='pool2')


# Do a two layer dense neural network on these outputs
test_out1 = tr_h
tr_h = tf.reshape(tr_h, [-1, 1568])
test_out2 = tr_h
tr_h = tf.layers.dense(tr_h, 32, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=None, name='dense1')
tr_h = tf.nn.relu(tr_h)
tr_h = tf.layers.dropout(tr_h, rate=0.25, training=True, name='dropout1')
tr_logits = tf.layers.dense(tr_h, 10, kernel_initializer=tf.glorot_uniform_initializer(), \
        activation=tf.identity, name='dense2')


# Layers of the neural network, testing

# Reshape data into the shape of pictures for convolutional layers
#tst_in_rs = tf.reshape(tst_in, shape=[-1, 28, 28, 1])

# First convolutional layer
#tst_h = tf.layers.conv2d(tst_in_rs, filters=32, kernel_size=[5, 5], \
#        kernel_initializer=tf.glorot_uniform_initializer(), \
#        padding="same", activation=None, name='conv1', reuse=True)
#tst_h = tf.layers.batch_normalization(tst_h, training=False, name='bn1', reuse=True)
#tst_h = tf.nn.relu(tst_h)
#tst_h = tf.layers.max_pooling2d(tst_h, pool_size=[2, 2], strides=2, \
#        name='pool1')

# Second convolution layer
#tst_h = tf.layers.conv2d(tst_h, filters=32, kernel_size=[5, 5], \
#        kernel_initializer=tf.glorot_uniform_initializer(), \
#        padding="same", activation=None, name='conv2', reuse=True)
#tst_h = tf.layers.batch_normalization(tst_h, training=False, name='bn2', reuse=True)
#tst_h = tf.nn.relu(tst_h)
#tst_h = tf.layers.max_pooling2d(tst_h, pool_size=[2, 2], strides=2, \
#        name='pool2')

# Do a two layer dense neural network on these outputs
#tst_h = tf.reshape(tst_h, [-1, 1568])
#tst_h = tf.layers.dense(tst_h, 32, kernel_initializer=tf.glorot_uniform_initializer(), \
#        activation=None, name='dense1', reuse=True)
#tst_h = tf.nn.relu(tst_h)
#tst_logits = tf.layers.dense(tst_h, 10, kernel_initializer=tf.glorot_uniform_initializer(), \
#        activation=tf.identity, name='dense2', reuse=True)

# Turn output layer into softmax job
#tr_sm = tf.nn.softmax(tr_logits)
#tst_sm = tf.nn.softmax(tst_logits)

# Calculate the cost and run the optimiser
tr_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tr_tgt, logits=tr_logits))
#tst_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=tst_tgt, logits=tst_logits))
optimizer = tf.train.AdamOptimizer(learning_rate=learnrate).minimize(tr_cost)

In [12]:
"""Train the neural network."""

with tf.Session() as sess:
    
    num_tr = g_tr_data.shape[0]
    num_tst = g_tst_data.shape[0]
    
    batch_size = 100   # Must divide num_tr
    num_batches = int(num_tr / batch_size + 0.00001)
    # Error metrics are recorded every batch_recordspace batches
    batch_recordspace = 50
    
    # Create a pandas DataFrame where the training and testing error is put
    columns = ['epoch', 'batch', 'training', 'testing']
    num_rows = int(num_batches * epochs / batch_recordspace + 0.00001)
    index = np.arange(num_rows)
    Prop_Correct = pd.DataFrame(np.zeros((num_rows,4)), index=index, columns=columns)
    
    # Initialise all variables
    sess.run(init)
    
    start_time = time.time()
    
    # Do the training, epoch is one run through full dataset
    for epoch in range(epochs):
        # One training iteration per batch
        for batch in range(num_batches):
            
            # Input the batch of training and testing data
            range_left = batch * batch_size
            range_right = (batch + 1) * batch_size
            feed_dict = {
                    tr_in: g_tr_data[range_left:range_right], \
                    tr_tgt: g_tr_labels_vc[range_left:range_right], \
                    tst_in: g_tst_data, \
                    tst_tgt: g_tst_labels_vc \
                    }
        
            # Run one iteration of neural network trainer
            iter_wanted = [
                    optimizer, \
                    tr_cost, tr_sm, tr_tgt, \
                    tst_cost, tst_sm, tst_tgt \
                    ]
            iter_outputs = sess.run(iter_wanted, feed_dict=feed_dict)
            _ = iter_outputs[0]
            out_tr_cost, out_tr_sm, out_tr_tgt = iter_outputs[1:4]
            out_tst_cost, out_tst_sm, out_tst_tgt = iter_outputs[4:7]
        
            # Find the proportion of correctly identified numbers in training dataset
            tr_truth = np.argmax(out_tr_tgt, axis=1)
            tr_nn_guess = np.argmax(out_tr_sm, axis=1)
            tr_num_correct = batch_size - len(np.nonzero(tr_nn_guess - tr_truth)[0])
            tr_fraction_correct = tr_num_correct / batch_size
        
            # Find the proportion of correctly identified numbers in testing dataset
            tst_truth = np.argmax(out_tst_tgt, axis=1)
            tst_nn_guess = np.argmax(out_tst_sm, axis=1)
            tst_num_correct = num_tst - len(np.nonzero(tst_nn_guess - tst_truth)[0])
            tst_fraction_correct = tst_num_correct / num_tst
            
            print(epoch, batch, tr_fraction_correct, tst_fraction_correct)
            
            # Print and input some outputs (training and testing errors)
#            if batch % batch_recordspace == 0:
#                print(epoch, batch, tr_fraction_correct, tst_fraction_correct)
#                row_num = int((epoch * num_batches + batch) / batch_recordspace + 0.00001)
#                Prop_Correct.loc[row_num, :] = [ \
#                        epoch, batch, tr_fraction_correct, tst_fraction_correct]
    
    # Output a pandas DataFrame with the training and testing errors
#    Prop_Correct.to_csv('2HL.csv')
    
    end_time = time.time()
    print('Time taken: {}s'.format(end_time - start_time))
    
    
    # Calculate the number correct across the whole dataset
            
    # Input the batch of training and testing data
    feed_dict = {
            tr_in: g_tr_data, \
            tr_tgt: g_tr_labels_vc, \
            tst_in: g_tst_data, \
            tst_tgt: g_tst_labels_vc \
            }
        
    # Run neural network again on whole training dataset
    run_wanted = [tr_sm, tr_tgt, tst_sm, tst_tgt]
    run_outputs = sess.run(run_wanted, feed_dict=feed_dict)
    out_tr_sm, out_tr_tgt, out_tst_sm, out_tst_tgt = run_outputs
            
    # Find the proportion of correctly identified numbers in training dataset
    # (The testing dataset has already been done)
    tr_truth = np.argmax(out_tr_tgt, axis=1)
    tr_nn_guess = np.argmax(out_tr_sm, axis=1)
    tr_num_correct = num_tr - len(np.nonzero(tr_nn_guess - tr_truth)[0])
    tr_fraction_correct = tr_num_correct / num_tr
            
    print('Correct, training dataset: {}'.format(tr_fraction_correct))
    print('Correct, testing dataset: {}'.format(tst_fraction_correct)) 
    
    

NameError: name 'init' is not defined