In [13]:
from keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# y_train.shape is 2d, (50000, 1). While Keras is smart enough to handle this
# it's a good idea to flatten the array.
y_train = y_train.reshape(-1)
y_test = y_test.reshape(-1)

Using TensorFlow backend.


In [14]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, 
                                                      test_size=0.3, 
                                                      random_state=42, 
                                                      stratify = y_train)

In [15]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm import tqdm
import os
from skimage.transform import resize
from random import shuffle
import pandas as pd
import cv2
from collections import Counter
from skimage import exposure
from sklearn.utils import shuffle as shuffle_X_y

data_dir = './data'

In [16]:
import tensorflow as tf
from tensorflow.contrib.layers import flatten

def leaky_relu(x, alpha, name):
    return tf.maximum(alpha * x, x, name) 

def conv_layer(x, num_input_channels, num_output_channels, 
               mu, sigma, window=(5,5)):
    window_x,window_y = window
    conv_W = tf.Variable(tf.truncated_normal(shape=(window_x, window_y, 
                                                    num_input_channels, num_output_channels), 
                                                     mean = mu, stddev = sigma))
    conv_b = tf.Variable(tf.zeros(num_output_channels))
    return tf.nn.conv2d(x, conv_W, strides=[1, 1, 1, 1], padding='SAME') + conv_b, conv_W

def fully_connected(x, shape_in, shape_out, mu = 0, sigma = 0.1):
    fc_W  = tf.Variable(tf.truncated_normal(shape=(shape_in, shape_out), 
                                            mean = mu, stddev = sigma))
    fc_b  = tf.Variable(tf.zeros(shape_out))
    return tf.matmul(x, fc_W) + fc_b, fc_W

In [26]:
def model(x, channel_depth, mu = 0, sigma = 1e-2, alpha=0.05, num_output=10): 
    ### Layer 1: Convolutional. Input = 32x32x10. Output = 32x32x16.
    conv1, conv1_W = conv_layer(x, channel_depth, 16, mu, sigma)
    print(conv1.get_shape())

    # Leaky ReLU
    conv1_activaton = leaky_relu(conv1, alpha, 'conv1_activaton')    

    # Pooling. Input = 32x32x16. Output = 16x16x16.
    conv1 = tf.nn.max_pool(conv1_activaton, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    print(conv1.get_shape())

    ### Layer 2: Convolutional. Output = 16x16x32.
    conv2, conv2_W = conv_layer(conv1, 16, 32, mu, sigma)
    print(conv2.get_shape())

    # Leaky ReLU
    conv2_activaton = leaky_relu(conv2, alpha, 'conv2_activaton')  

    # Pooling. Input = 16x16x32. Output = 8x8x32.
    conv2 = tf.nn.max_pool(conv2_activaton, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    print(conv2.get_shape())

    ### Layer 3: Convolutional. Output = 8x8x64.
    conv3, conv3_W = conv_layer(conv2, 32, 64, mu, sigma)
    print(conv3.get_shape())

    # Leaky ReLU
    conv3_activaton = leaky_relu(conv3, alpha, 'conv3_activaton')  

    # Pooling. Input = 8x8x64. Output = 4x4x64.
    conv3 = tf.nn.max_pool(conv3_activaton, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 
    print(conv3.get_shape())

    # Flatten. Input = 4x4x64. Output = 1024.
    fc0   = flatten(conv3)

    ### Layer 3: Fully Connected. Input = 1024. Output = 512.
    fc1, fc1_W = fully_connected(fc0, 1024, 512, mu, sigma)

    # Leaky ReLU
    fc1 = leaky_relu(fc1, alpha, 'fc1_activation')  
    
    # dropout
    keep_prob = tf.placeholder(tf.float32)
    fc1 = tf.nn.dropout(fc1, keep_prob)

    ### Layer 4: Fully Connected. Input = 512. Output = 256.
    fc2, fc2_W = fully_connected(fc1, 512, 256, mu, sigma)

    # Leaky ReLU
    fc2 = leaky_relu(fc2, alpha, 'fc2_activation')   
    
    # dropout
    fc2 = tf.nn.dropout(fc2, keep_prob)    

    # Layer 5: Fully Connected. Input = 256. Output = num_output.
    logits, fc3_W = fully_connected(fc2, 256, num_output, mu, sigma)

    return (logits, keep_prob, [conv1_W, conv2_W, conv3_W, fc1_W, fc2_W, fc3_W],
            [conv1_activaton, conv2_activaton, conv3_activaton])

In [27]:
channel_depth = 3
num_outputs = 10

x = tf.placeholder(tf.float32, (None, 32, 32, channel_depth))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, num_outputs)

In [28]:
rate = 1e-3       # learning rate
alpha = 1e-05     # slope for negative input values for leaky ReLU's
sigma = 1e-2      # std for initializing random weights
beta = 1e-4       # multiplier for L2 regularization

logits, keep_prob, weights, activations = model(x, channel_depth, sigma=sigma, alpha=alpha)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits)

loss_operation = tf.reduce_mean(cross_entropy)

# Loss function using L2 Regularization
regularizer = None
for weight in weights:
    if regularizer is None:
        regularizer = tf.nn.l2_loss(weight)
    else:
        regularizer = regularizer + tf.nn.l2_loss(weight)
loss_operation = tf.reduce_mean(loss_operation + beta * regularizer)

optimizer = tf.train.AdamOptimizer(learning_rate = rate)
training_operation = optimizer.minimize(loss_operation)

(?, 32, 32, 16)
(?, 16, 16, 16)
(?, 16, 16, 32)
(?, 8, 8, 32)
(?, 8, 8, 64)
(?, 4, 4, 64)


In [39]:
EPOCHS = 30
BATCH_SIZE = 128

In [40]:
logits_argmax = tf.argmax(logits, 1, name='logits_argmax')
one_hot_y_argmax = tf.argmax(one_hot_y, 1, name='one_hot_y_argmax')
correct_prediction = tf.equal(logits_argmax, one_hot_y_argmax, name='correct_prediction')
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0})
        total_accuracy += (accuracy * len(batch_x))
    return total_accuracy / num_examples

In [41]:
def train_model(epochs, batch_size, X_train_preprocessed, y_train, 
                X_valid_preprocessed, y_valid, model_name, keep_prob_val = 0.5,
                max_to_keep=0):
    saver = tf.train.Saver(max_to_keep=max_to_keep)
    model_dir = '%s/models/%s' % (data_dir, model_name)
    os.makedirs(model_dir, exist_ok=True)
    
    validation_accuracy_arr = []
    
    best_validation_accuracy_epoch = 0
    best_validation_accuracy = 0.0

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        num_examples = len(X_train_preprocessed)
    
        print("Training...")
        print()
        for i in range(epochs):
            X_train_preprocessed, y_train = shuffle_X_y(X_train_preprocessed, y_train)
        
            for offset in range(0, num_examples, batch_size):
                end = offset + BATCH_SIZE
                batch_x, batch_y = (X_train_preprocessed[offset:end], 
                                    y_train[offset:end])
                sess.run(training_operation, feed_dict={x: batch_x, y: batch_y, 
                                                        keep_prob: keep_prob_val})
        
            validation_accuracy = evaluate(X_valid_preprocessed, y_valid)
            validation_accuracy_arr.append(validation_accuracy)
            print("EPOCH {} ...".format(i+1))
            print("Validation Accuracy = {:.3f}".format(validation_accuracy))
        
            if best_validation_accuracy < validation_accuracy:
                best_validation_accuracy = validation_accuracy
                best_validation_accuracy_epoch = i+1
                saver.save(sess, '%s/%s' % (model_dir, model_name))
                print("Model saved")
            
            print()
            
    print("Best model - epoch: %d, best validation accuracy: %.3f" % 
          (best_validation_accuracy_epoch, best_validation_accuracy))
    
    return validation_accuracy_arr

In [42]:
def display_test_accuracy(X_test_preprocessed, y_test, model_name):
    model_dir = '%s/models/%s' % (data_dir, model_name)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, tf.train.latest_checkpoint(model_dir))

        test_accuracy = evaluate(X_test_preprocessed, y_test)
        print("Test Accuracy = {:.3f}".format(test_accuracy))

In [43]:
def predict_model(X, model_name, batch_size):
    if X.shape[0] < batch_size:
        batch_size = X.shape[0]
        
    model_dir = '%s/models/%s' % (data_dir, model_name)
    saver = tf.train.Saver()
    
    logits_argmax_arr = []
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())  
        saver.restore(sess, tf.train.latest_checkpoint(model_dir))
        
        start_idx = 0
        end_idx = 0
        while start_idx < X.shape[0]:
            end_idx += batch_size
            end_idx = min(end_idx, X.shape[0])
            logits_argmax_arr.append(sess.run(logits_argmax, 
                                              feed_dict={x: X[start_idx:end_idx], 
                                                         keep_prob: 1.0}))
            start_idx += batch_size
        
    result = np.concatenate(logits_argmax_arr, axis=0)
    
    assert result.shape[0] == X.shape[0]
    
    return result

In [46]:
train_cnt = int(0.1*X_train.shape[0])
_X_train, _y_train = X_train[:train_cnt], y_train[:train_cnt]
valid_cnt = int(0.1*X_valid.shape[0])
_X_valid, _y_valid = X_valid[:valid_cnt], y_valid[:valid_cnt]
print(train_cnt, valid_cnt)

3500 1500


In [47]:
model_name = 'single_model' 
print("###%s" % model_name)
validation_accuracy_arr = train_model(EPOCHS, BATCH_SIZE, 
                                      _X_train, _y_train, 
                                      _X_valid, _y_valid, 
                                      model_name, keep_prob_val = 0.5, max_to_keep=0)    
    
display_test_accuracy(X_test, y_test, model_name)

###single_model
Training...

EPOCH 1 ...
Validation Accuracy = 0.126
INFO:tensorflow:./data/models/single_model/single_model is not in all_model_checkpoint_paths. Manually adding it.
Model saved

EPOCH 2 ...
Validation Accuracy = 0.171
INFO:tensorflow:./data/models/single_model/single_model is not in all_model_checkpoint_paths. Manually adding it.
Model saved

EPOCH 3 ...
Validation Accuracy = 0.186
INFO:tensorflow:./data/models/single_model/single_model is not in all_model_checkpoint_paths. Manually adding it.
Model saved

EPOCH 4 ...
Validation Accuracy = 0.203
INFO:tensorflow:./data/models/single_model/single_model is not in all_model_checkpoint_paths. Manually adding it.
Model saved

EPOCH 5 ...
Validation Accuracy = 0.270
INFO:tensorflow:./data/models/single_model/single_model is not in all_model_checkpoint_paths. Manually adding it.
Model saved

EPOCH 6 ...
Validation Accuracy = 0.309
INFO:tensorflow:./data/models/single_model/single_model is not in all_model_checkpoint_paths. Ma