In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
%matplotlib inline

# Load the data
with np.load(os.path.join('data','cifar10-20k.npz'), allow_pickle=False) as npz_file:
    data = dict(npz_file.items())
    
y_full = data['labels']
names = data['names']
X_full = data['data'].reshape(-1, 32, 32, 3)

# set number of classes
num_classes = names.shape[0]

# Split data
from sklearn.model_selection import train_test_split

X_tr_full, X_te, y_tr_full, y_te = train_test_split(X_full, y_full, test_size=0.1, random_state=1)

# Split data again into training and cv
X_tr, X_cv, y_tr, y_cv = train_test_split(X_tr_full, y_tr_full, test_size=0.08, random_state=1)

print("X_tr", X_tr.shape)
print("X_cv", X_cv.shape)
print("X_te", X_te.shape)

X_tr (16560, 32, 32, 3)
X_cv (1440, 32, 32, 3)
X_te (2000, 32, 32, 3)


In [2]:
# Batch generator
def get_batches(X, y, batch_size):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y))
    np.random.shuffle(shuffled_idx)

    # Enumerate indexes by steps of batch_size
    for i in range(0, len(y), batch_size):
        # Batch indexes
        batch_idx = shuffled_idx[i:i+batch_size]
        yield X[batch_idx], y[batch_idx]

## Create the Model

In [3]:
# Create new graph
graph = tf.Graph()
init = True

with graph.as_default():
    # Placeholders
    X = tf.placeholder(dtype=tf.float32, shape=[None, 32, 32, 3])
    y = tf.placeholder(dtype=tf.int32, shape=[None])
    training = tf.placeholder(dtype=tf.bool)
    
    # create global step for decaying learning rate
    global_step = tf.Variable(0, trainable=False)
    
    # Decay the learning rate - 
    learning_rate = tf.train.exponential_decay(0.001,               # start at 0.001
                                               global_step, 
                                               2000,                # 2000 steps
                                               0.9,                 # 0.95 increment
                                               staircase=True)
    
    # Convolutional layer 1 
    conv1 = tf.layers.conv2d(
        X,                           # Input data
        filters=64,                  # 64 filters
        kernel_size=(5, 5),          # Kernel size: 5x5
        strides=(1, 1),              # Stride: 2
        padding='SAME',              # "same" padding
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0), # Small standard deviation
        name='conv1'                  # Add name
    )
    
     # Max pooling layer 1
    pool1 = tf.layers.max_pooling2d(
        conv1,                       # Input
        pool_size=(3, 3),            # Pool size: 3x3
        strides=(2, 2),              # Stride: 2
        padding='SAME',              # "same" padding
        name='pool1'
    )
    
    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
    
    # Convolutional layer 2
    conv2 = tf.layers.conv2d(
        norm1,                       # Input
        filters=64,                  # 64 filters
        kernel_size=(5, 5),          # Kernel size: 5x5
        strides=(1, 1),              # Stride: 1
        padding='SAME',              # "same" padding
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.truncated_normal_initializer(stddev=5e-2, seed=0),    # Small standard deviation
        name='conv2'                 # Add name
    )
    
    norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
    
     # Max pooling layer 2 (2x2, stride: 2) - TUNED
    pool2 = tf.layers.max_pooling2d(
        norm2,                       # input
        pool_size=(3, 3),            # pool size 2x2
        strides=(2, 2),              # stride 2
        padding='SAME'
    )
    
    # Flatten output
    flat_output = tf.contrib.layers.flatten(pool2)
    
    ## TRY THIS
    # dropout at 40%
    flat_output = tf.layers.dropout(flat_output, rate=0.5, seed=1, training=training)
    
    # Fully connected layer
    fc1 = tf.layers.dense(
        flat_output,                 # input
        384,                         # 256 hidden units
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
        bias_initializer=tf.zeros_initializer()
    )
    
    ## TRY THIS
    # dropout at 50%
    # fc1 = tf.layers.dropout(fc1, rate=0.30, seed=1, training=training)
    
    # Fully connected layer
    fc2 = tf.layers.dense(
        fc1,                 # input
        192,                         # 256 hidden units
        activation=tf.nn.relu,       # ReLU
        kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=0),
        bias_initializer=tf.zeros_initializer()
    )
    
    ## TRY THIS
    # dropout at 75%
    # fc2 = tf.layers.dropout(fc2, rate=0.75, seed=1, training=training)
    
    
    # Output layer
    logits = tf.layers.dense(
        fc2,                         # input
        num_classes,                           # One output unit per category
        activation=None,             # No activation function
        kernel_initializer=tf.variance_scaling_initializer(scale=1, seed=0),
        bias_initializer=tf.zeros_initializer()
    )
    
    # Kernel weights of the 1st conv. layer
    with tf.variable_scope('conv1', reuse=True):
        conv_kernels = tf.get_variable('kernel')
    
    with tf.variable_scope('conv2', reuse=True):
        conv_kernels2 = tf.get_variable('kernel')
    
    # Mean cross-entropy
    mean_ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits))

    # Adam optimizer
    gd = tf.train.AdamOptimizer(learning_rate=learning_rate)
    
    # Minimize cross-entropy
    train_op = gd.minimize(mean_ce, global_step=global_step)

    # Compute predictions and accuracy
    predictions = tf.argmax(logits, axis=1, output_type=tf.int32)
    is_correct = tf.equal(y, predictions)
    accuracy = tf.reduce_mean(tf.cast(is_correct, dtype=tf.float32))

In [None]:
# Placeholders for metrics
valid_acc_values = []
valid_cost_values = []
train_acc_values = []
train_cost_values = []
dropout1_values = []

epochs = 20
print_every = 3
checkpoint_every = 5
use_gpu = True

if use_gpu:
    config = tf.ConfigProto()
    config.gpu_options.allocator_type = 'BFC'
    config.gpu_options.per_process_gpu_memory_fraction = 0.6
else:
    config = tf.ConfigProto(device_count = {'GPU': 0})

with tf.Session(graph=graph, config=config) as sess:
    # create the saver
    saver = tf.train.Saver()
    
    # If the model is new initialize variables, else restore the session
    if init:
        sess.run(tf.global_variables_initializer())
    else:
        saver.restore(sess, './model/cifar_model.ckpt')

    # Set seed
    np.random.seed(0)

    # Train several epochs
    for epoch in range(epochs):
        # Accuracy values (train) after each batch
        batch_acc = []
        batch_cost = []

        for X_batch, y_batch in get_batches(X_tr, y_tr, 128):
            # Run training and evaluate accuracy
            _, acc_value, cost_value = sess.run([train_op, accuracy, mean_ce], feed_dict={
                X: X_batch,
                y: y_batch,
                training: True
            })

            # Save accuracy (current batch)
            batch_acc.append(acc_value)
            batch_cost.append(cost_value)

        # Evaluate validation accuracy
        valid_acc, valid_cost, lr = sess.run([accuracy, mean_ce, learning_rate], feed_dict={
            X: X_cv,
            y: y_cv,
            training: False
        })
        valid_acc_values.append(valid_acc)
        valid_cost_values.append(valid_cost)
        train_acc_values.append(np.mean(batch_acc))
        train_cost_values.append(np.mean(batch_cost))
        
        # Print progress every fifth epoch to keep output to reasonable amount
        if(epoch % print_every == 0):
            print('Epoch {:02d} - cv acc: {:.3f} - train acc: {:.3f} (mean) - cv cost: {:.3f} - lr: {:.5f}'.format(
                epoch, valid_acc, np.mean(batch_acc), valid_cost, lr
            ))
        
        # save checkpoint every nth epoch except the first
        if((epoch != 0) & (epoch % checkpoint_every == 0)):
            print("Saving checkpoint")
            # save the model
            save_path = saver.save(sess, "./model/cifar_model.ckpt")
    
            # Now that model is saved set init to false so we don't need to constantly retrain it
            init = False
        
    # print results of last epoch
    print('Epoch {} - cv acc: {:.3f} - train acc: {:.3f} (mean) - cv cost: {:.3f}'.format(
                epochs, valid_acc, np.mean(batch_acc), valid_cost
            ))
    # Check on the test data
    test_acc = sess.run(accuracy, feed_dict={
        X: X_te,
        y: y_te,
        training: False
    })
    
    # Get 1st conv. layer kernels
    kernels = conv_kernels.eval()
    #kernels2 = conv_kernels2.eval()
    
    save_path = saver.save(sess, "./model/cifar_model.ckpt")
    
print("Convolutional network accuracy (test set):",test_acc)

# Plot cost and accuracy and training and validation
f, ax = plt.subplots(1,2,figsize=(20,5))

ax[0].plot(valid_acc_values, color="red", label="Validation")
ax[0].plot(train_acc_values, color="blue", label="Training")
ax[0].set_title('Validation accuracy: {:.3f} (mean last 3)'.format(np.mean(valid_acc_values[-3:])))
ax[0].legend()
ax[0].set_xlabel('epoch')
ax[0].set_ylabel('accuracy')
ax[1].plot(valid_cost_values, color="red", label="Validation")
ax[1].plot(train_cost_values, color="blue", label="Training")
ax[1].set_title('Validation cost: {:.3f} (mean last 3)'.format(np.mean(valid_cost_values[-3:])))
ax[1].set_xlabel('epoch')
ax[1].set_ylabel('cost')
plt.legend()
plt.show()

Epoch 00 - cv acc: 0.106 - train acc: 0.105 (mean) - cv cost: 2.300 - lr: 0.00100
Epoch 03 - cv acc: 0.328 - train acc: 0.249 (mean) - cv cost: 1.799 - lr: 0.00100
