In [1]:
%autosave 60

import tensorflow as tf
import numpy as np
from tensorflow.contrib.layers import batch_norm, dropout
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Autosaving every 60 seconds
Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [2]:
def he_normal_initialisation(n_inputs, n_outputs):
    stddev = np.power(2 / (n_inputs + n_outputs), 1 / np.sqrt(2))
    # truncated normal distributions limit the size of the weights, speeding up the training time.
    return tf.truncated_normal((n_inputs, n_outputs), stddev=stddev)

def he_uniform_initialisation(n_inputs, n_outputs):
    r = np.power(6 / (n_inputs + n_outputs), 1 / np.sqrt(2))
    # truncated normal distributions limit the size of the weights, speeding up the training time.
    return tf.random_uniform((n_inputs, n_outputs), -r, r)

def neuron_layer(X, n_neurons, name):
    with tf.name_scope(name):
        #print(X.get_shape()[1])
        n_inputs = int(X.get_shape()[1])
        W = tf.Variable(he_normal_initialisation(n_inputs, n_neurons), name="weights")
        b = tf.Variable(tf.zeros([n_neurons]), name="biases")
        z = tf.matmul(X, W) + b
        return tf.nn.elu(z)

def cnn_layer(X, patch_size, n_input_filters, n_filters, name, initialised_weights_stddev = 0.05):
    with tf.name_scope(name):
        w = tf.Variable(tf.truncated_normal(
            [patch_size, patch_size, n_input_filters, n_filters], stddev=initialised_weights_stddev))
        b = tf.Variable(tf.zeros([n_filters]))
        m = tf.nn.elu(tf.nn.conv2d(X, w, strides=[1, 2, 2, 1], padding="SAME") + b)
        return tf.nn.local_response_normalization(m, depth_radius=7, alpha=1.2)

In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin

class CnnClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_cnn_layers=3, first_cnn_n_neurons=200, ccn_layer_ratio=0.5, fully_connected_neurons = 1200,
                learning_rate=0.01):
        self.n_cnn_layers = n_cnn_layers
        self.first_cnn_n_neurons = first_cnn_n_neurons
        self.ccn_layer_ratio = ccn_layer_ratio
        self.fully_connected_neurons = fully_connected_neurons
        self.learning_rate = learning_rate
        self._build_graph()

    def _build_graph(self):
        input_spatial_size = 28
        input_channels = 1
        n_filters_per_layer = [int(self.first_cnn_n_neurons * (self.ccn_layer_ratio ** i)) for i in range(self.n_cnn_layers)]
        print("n_filters_per_layer:", n_filters_per_layer)
        patch_size = 3
        self.n_output = 10
        self.batch_size = 200
        
        with tf.device("/gpu:0"):
            self.x = tf.placeholder(tf.float32, shape=(self.batch_size, input_spatial_size ** 2), name="input")
            reshaped_x = tf.reshape(self.x, (tf.shape(self.x)[0], input_spatial_size, input_spatial_size, 1))
            self.y = tf.placeholder(tf.int64, shape=(self.batch_size), name="y")

            with tf.name_scope("dnn"):
                input_tensor = reshaped_x
                n_input_filters = input_channels
                for i in range(len(n_filters_per_layer)):
                    input_tensor = cnn_layer(input_tensor, patch_size, n_input_filters, n_filters_per_layer[i], "hidden" + str(i + 1))
                    n_input_filters = n_filters_per_layer[i]
                #avg_pool_output = tf.nn.avg_pool(input_tensor, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
                shape = input_tensor.get_shape().as_list()
                #print(shape)
                reshape = tf.reshape(input_tensor, [shape[0], shape[1] * shape[2] * shape[3]])
                fc = tf.nn.dropout(neuron_layer(reshape, self.fully_connected_neurons, "fully_connected_one"), keep_prob=0.4)
                #print(reshape.get_shape())
                logits = neuron_layer(fc, self.n_output, "output")
                self.evaluation = tf.nn.softmax(logits)

            with tf.name_scope("loss"):
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=logits)
                self.loss = tf.reduce_mean(cross_entropy, name="loss")

            with tf.name_scope("training"):
                optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
                self.training_op = optimizer.minimize(self.loss)

        with tf.name_scope("eval"):
            k = 1
            correctness = tf.nn.in_top_k(logits, self.y, k)
            self.accuracy = tf.reduce_mean(tf.cast(correctness, tf.float32)) * 100
            
        self.init = tf.global_variables_initializer()

    def fit(self, X, y, epochs = 20):
        saver = tf.train.Saver()

        interim_checkpoint_path = "./checkpoints/mnist_cnn_model.ckpt"
        early_stopping_checkpoint_path = "./checkpoints/mnist_cnn_model_early_stopping.ckpt"

        from datetime import datetime

        now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        root_logdir = "tf_logs"
        log_dir = "{}/run-{}/".format(root_logdir, now)

        loss_summary = tf.summary.scalar('loss', self.loss)
        accuracy_summary = tf.summary.scalar("accuracy", self.accuracy)
        summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())
        
        n_batches = int(np.ceil(len(X) // self.batch_size))

        early_stopping_check_frequency = self.batch_size // 4
        early_stopping_check_limit = self.batch_size * 2

        sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        self.session = sess
        sess.run(self.init)
        #saver.restore(sess, interim_checkpoint_path)

        best_validation_acc = 0.0
        best_validation_step = 0
        for epoch in range(epochs):
            print("epoch", epoch)
            for batch_index in range(n_batches):
                step = epoch * n_batches + batch_index
                # TODO: replace this with code that gets a batch from X and y.
                X_batch, y_batch = mnist.train.next_batch(self.batch_size)
                if batch_index % 10 == 0:
                    summary_str = summary_op.eval(session=sess, feed_dict={self.x: X_batch, self.y: y_batch})
                    file_writer.add_summary(summary_str, step)
                t, l, a = sess.run([self.training_op, self.loss, self.accuracy], feed_dict={self.x: X_batch, self.y: y_batch})
                if batch_index % 10 == 0: print("loss:", l, "train accuracy:", a)
                # Early stopping check
                if batch_index % early_stopping_check_frequency == 0:
                    validation_acc = self.prediction_accuracy(mnist.validation.images, mnist.validation.labels)
                    print("validation accuracy", validation_acc)
                    if validation_acc > best_validation_acc:
                        saver.save(sess, early_stopping_checkpoint_path)
                        best_validation_acc = validation_acc
                        best_validation_step = step
                    elif step >= (best_validation_step + early_stopping_check_limit):
                        print("Stopping early during epoch", epoch)
                        break
            else:
                continue
            break
            save_path = saver.save(sess, interim_checkpoint_path)
        saver.restore(sess, early_stopping_checkpoint_path)
        save_path = saver.save(sess, "./checkpoints/mnist_cnn_model_final.ckpt")
            
    def predict_proba(self, X):
        raise Error("Not Implemented")

    def predict(self, X):
        dataset_size = X.shape[0]
        #print "dataset_size: ", dataset_size, " batch_size: ", batch_size
        if dataset_size % self.batch_size != 0:
            raise "batch_size must be a multiple of dataset_size."
        predictions = np.ndarray(shape=(dataset_size, self.n_output), dtype=np.float32)
        steps = dataset_size // self.batch_size
        #print "steps: ", steps
        for step in range(steps):
            offset = (step * self.batch_size)
            #print "offset ", offset
            batch_data = X[offset:(offset + self.batch_size), :]
            feed_dict = {
                self.x: batch_data
            }
            predictions[offset:offset+self.batch_size, :] = self.evaluation.eval(session=self.session, feed_dict=feed_dict)
        return predictions
    
    def _prediction_accuracy(self, predictions, labels):
        return (100.0 * np.sum(np.argmax(predictions, 1) == labels)
              / predictions.shape[0])
    
    def prediction_accuracy(self, X, y):
        predictions = self.predict(X)
        return self._prediction_accuracy(predictions, y)

In [4]:
cnn_clf = CnnClassifier()
cnn_clf.fit(mnist.train.images, mnist.train.labels)

test_acc = cnn_clf.prediction_accuracy(mnist.test.images, mnist.test.labels)
print(">>>>>>>>>> test dataset accuracy:", test_acc)

n_filters_per_layer: [200, 100, 50]
epoch 0
loss: 2.30233 train accuracy: 10.0
validation accuracy 36.2
loss: 0.988802 train accuracy: 75.5
loss: 0.795613 train accuracy: 78.5
loss: 0.511704 train accuracy: 86.0
loss: 0.453489 train accuracy: 87.0
loss: 0.594584 train accuracy: 86.0
validation accuracy 89.9
loss: 0.654343 train accuracy: 87.0
loss: 0.180736 train accuracy: 94.5
loss: 0.395703 train accuracy: 92.0
loss: 0.273213 train accuracy: 93.0
loss: 0.297605 train accuracy: 94.5
validation accuracy 92.24
loss: 0.430427 train accuracy: 88.5
loss: 0.473437 train accuracy: 90.0
loss: 0.406114 train accuracy: 91.5
loss: 0.313312 train accuracy: 94.0
loss: 0.155711 train accuracy: 97.0
validation accuracy 92.42
loss: 0.339202 train accuracy: 93.5
loss: 0.475945 train accuracy: 91.0
loss: 0.242563 train accuracy: 94.0
loss: 0.326255 train accuracy: 93.0
loss: 0.257056 train accuracy: 95.0
validation accuracy 94.08
loss: 0.52771 train accuracy: 91.5
loss: 0.281026 train accuracy: 93.0
lo

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {
    "n_cnn_layers": range(1, 15),
    "first_cnn_n_neurons": [10, 100, 200, 400, 800, 1000, 1200, 2000],
    "ccn_layer_ratio": [0.1, 0.3, 0.5, 0.7, 0.9],
    "fully_connected_neurons": [200, 500, 1000, 1200, 1500, 2000, 3000],
    "learning_rate": [0.001, 0.005, 0.01, 0.02, 0.05],
    #"batch_size": [10, 50, 100, 500],
    #"activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
}

rnd_search = RandomizedSearchCV(CnnClassifier(), param_distribs, n_iter=100,
                                #fit_params={"X_valid": X_valid1, "y_valid": y_valid1, "n_epochs": 1000},
                                random_state=42, verbose=2)
rnd_search.fit(mnist.train.images, mnist.train.labels)

n_filters_per_layer: [200, 100, 50]
Fitting 3 folds for each of 100 candidates, totalling 300 fits
n_filters_per_layer: [200, 100, 50]
n_filters_per_layer: [200, 100, 50]
[CV] ccn_layer_ratio=0.9, n_cnn_layers=4, learning_rate=0.02, first_cnn_n_neurons=10, fully_connected_neurons=500 
epoch 0
loss: 2.30279 train accuracy: 6.0
validation accuracy 42.12
loss: 2.66131 train accuracy: 50.5
loss: 1.09941 train accuracy: 70.5
loss: 0.818534 train accuracy: 77.5
loss: 0.787004 train accuracy: 82.5
loss: 0.662137 train accuracy: 83.5
validation accuracy 86.14
loss: 0.495906 train accuracy: 87.5
loss: 0.628606 train accuracy: 83.5
loss: 0.40967 train accuracy: 89.0
loss: 0.609841 train accuracy: 87.0
loss: 0.292426 train accuracy: 92.5
validation accuracy 92.12
loss: 0.314274 train accuracy: 89.5
loss: 0.485306 train accuracy: 87.5
loss: 0.21564 train accuracy: 94.5
loss: 0.456117 train accuracy: 91.5
loss: 0.368529 train accuracy: 92.0
validation accuracy 92.3
loss: 0.205393 train accuracy: 93

In [None]:
import os; os.system("beep -f 555 -l 460")
os.system("shutdown")

Adding a FC layer doesn't seem to be improving the model's performance.