<h5> Ekrem Çetinkaya S004228 </h5>
<hr />
<h1> <strong> Cifar-10 Trainer </strong> </h1>
<hr />
<h3> This notebook will be used for initalizing two networks and training them </h3>

<h4> Read data and do preprocessing </h4>
<h5> You only need to run this cell once </h5>

In [None]:
import read_data
import numpy as np
def normalize(images):
    maximum = np.max(images)
    minimum = np.min(images)
    
    return (images - minimum) / (maximum - minimum)

def one_hot_encode(labels):
    nx = np.max(labels) + 1
    
    return np.eye(nx)[labels]
read_data.preprocess_and_save_data(normalize, one_hot_encode)

<hr />
<h3> CHECKPOINT </h3>
<hr />
<br>Run from here if you executed above cell before
<h4> Read validation images and labels </h4>

In [1]:
import pickle
valid_images, valid_labels = pickle.load(open('preprocess_validation.p', mode='rb'))

<h4> Import tensorflow </h4>

In [3]:
import tensorflow as tf

<h4> Input variables that will be used in network design </h5>

In [4]:
def image_input(image_shape):
    return tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], 3], name='image')

def label_input(class_count):
    return tf.placeholder(tf.float32, [None, class_count], name='label')

def dropout_input():
    return tf.placeholder(tf.float32, name='keep_prob')

tf.reset_default_graph()

<h4> CNN variables </h4>

In [None]:
import numpy as np # In case the notebook is run after CHECKPOINT
def conv2d(x_tensor, conv_features, conv_filter, conv_strides):
    input_depth = x_tensor.get_shape().as_list()[-1]
    Weight = tf.Variable(tf.random_normal(shape=[conv_filter[0], conv_filter[1], input_depth, conv_features], stddev=0.1))
    bias = tf.Variable(tf.zeros(conv_features))
    conv = tf.nn.relu(tf.nn.conv2d(x_tensor, Weight, [1, conv_strides[0], conv_strides[1], 1], 'SAME') + bias)

    return conv

def max_pool(x_tensor, pool_size, pool_stride):

    return tf.nn.max_pool(x_tensor, [1, pool_size[0], pool_size[1], 1], [1, pool_stride[0], pool_stride[1] ,1], padding='SAME')

def flatten(x_tensor):
    shape = x_tensor.get_shape().as_list()

    return tf.reshape(x_tensor, [-1, np.prod(shape[1:])])

def fully_conn(x_tensor, num_outputs):
    shape = x_tensor.get_shape().as_list()
    Weight = tf.Variable(tf.random_normal([shape[-1], num_outputs], stddev=0.1))
    bias = tf.Variable(tf.zeros(num_outputs))

    return tf.nn.relu(tf.add(tf.matmul(x_tensor, Weight), bias))

def output(x_tensor, num_outputs):
    shape = x_tensor.get_shape().as_list()
    Weight = tf.Variable(tf.random_normal([shape[-1], num_outputs]))
    bias = tf.Variable(tf.zeros(num_outputs))

    return tf.add(tf.matmul(x_tensor, Weight), bias)

<h3> <strong> First CNN </strong> </h3>
<ul>
<li> <strong> Convolution </strong> 3 x 3 filter, 64 features </li>
<li> <strong> Max Pool </strong> 3 x 3 size, stride 2 </li>
<li> <strong> Dropout </strong> 50% probability</li>
<li> <strong> Convolution </strong> 3 x 3 filter, 64 features </li>
<li> <strong> Max Pool </strong> 3 x 3 size, stride 2 </li>
<li> <strong> Dropout </strong> 50% probability</li>
<li> <strong> Convolution </strong> 3 x 3 filter, 96 features </li>
<li> <strong> Max Pool </strong> 2 x 2 size, stride 2 </li>
<li> <strong> Dropout </strong> 50% probability</li>
<li> <strong> Convolution </strong> 3 x 3 filter, 128 features </li>
<li> <strong> Max Pool </strong> 2 x 2 size, stride 2 </li>
<li> <strong> Dropout </strong> 50% probability</li>
<li> <strong> Flatten layer </strong> </li>
<li> <strong> Fully connected layer </strong> 128 </li>
<li> <strong> Softmax layer </strong> </li>

In [5]:
def cnn_model_one(input):
    #402K parameters , 71,58% accuracy after 170 iteration
    conv1 = conv2d(input, conv_features=64, conv_filter=[3,3], conv_strides=[1,1])
    max_pool1 = max_pool(conv1, pool_size=[3,3], pool_stride=[2,2])
    tf.nn.dropout(max_pool1, 0.5)
    
    conv2 = conv2d(max_pool1, 64, [3,3], [1,1])
    max_pool2 = max_pool(conv2, [3,3], [2,2])
    tf.nn.dropout(max_pool2, 0.5)
    
    conv3 = conv2d(max_pool2, 96, [3,3], [1,1])
    max_pool3 = max_pool(conv3, [2,2], [2,2])
    tf.nn.dropout(max_pool3, 0.5)
    
    conv4 = conv2d(max_pool3, 128, [3,3], [1,1])
    max_pool4 = max_pool(conv4, [2,2], [2,2])
    tf.nn.dropout(max_pool4, 0.5)
    
    flattened = flatten(max_pool3)
    fc1 = fully_conn(flattened, 128)
    
    return output(fc1, 10)

<h3> <strong> Second CNN </strong> </h3>
<ul>
<li> <strong> Convolution </strong> 3 x 3 filter, 32 features </li>
<li> <strong> Convolution </strong> 3 x 3 filter, 32 features </li>
<li> <strong> Max Pool </strong> 2 x 2 size, stride 2 </li>
<li> <strong> Dropout </strong> 25% probability</li>
<li> <strong> Convolution </strong> 3 x 3 filter, 48 features </li>
<li> <strong> Convolution </strong> 3 x 3 filter, 48 features </li>
<li> <strong> Max Pool </strong> 2 x 2 size, stride 2 </li>
<li> <strong> Dropout </strong> 25% probability</li>
<li> <strong> Flatten layer </strong> </li>
<li> <strong> Fully connected layer </strong> 128 </li>
<li> <strong> Dropout </strong> 50% probability</li>
<li> <strong> Softmax layer </strong> </li>

In [6]:
def cnn_model_two(input):
    #440K parameters, 66.09% accuracy after 88 iterations
    conv1 = conv2d(input, conv_features=32, conv_filter=[3,3], conv_strides=[1,1])
    conv2 = conv2d(conv1, 32, [3,3], [1,1])
    max_pool1 = max_pool(conv2, pool_size=[2,2], pool_stride=[2,2])
    tf.nn.dropout(max_pool1, 0.25)
    
    conv3 = conv2d(max_pool1, 48, [3,3], [1,1])
    conv4 = conv2d(conv3, 48, [3,3], [1,1])
    max_pool2 = max_pool(conv4, [2,2], [2,2])
    tf.nn.dropout(max_pool2, 0.25)
    
    flattened = flatten(max_pool2)
    fc1 = fully_conn(flattened, 128)
    tf.nn.dropout(fc1, 0.5)
    
    return output(fc1, 10)

<h4> Define variables for training </h4>

In [7]:
tf.reset_default_graph()
images = image_input((32, 32, 3))
labels = label_input(10)

<h4> Select which model to train </h4>
<hr />
<h4> save_model_path will be used while both saving and evaluating the model </h4>

In [8]:
#logits = cnn_model_one(images)
#save_model_path =".\\cifar_model_one"
logits = cnn_model_two(images)
save_model_path = ".\\cifar_model_two"

logits = tf.identity(logits, name='logits')

<h4> Define loss and optimizer </h4>

In [9]:
# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(cost)
# Accuracy
correct_result = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_result, tf.float32), name='accuracy')

<h4> Specify iteration count and batch size </h4>

In [10]:
MAX_ITERATIONS = 200
BATCH_SIZE = 128

<h4> Functions to train networks </h4>

In [11]:
import read_data

def train_network(session ,optimizer, image_batch, label_batch):
    session.run(optimizer, feed_dict={images: image_batch, labels: label_batch})

def print_stats(session, feature_batch, label_batch, cost, accuracy):
    global valid_features, valid_labels
    validation_accuracy = session.run(
        accuracy,
        feed_dict={
            images: valid_images,
            labels: valid_labels,
        }
    )
    cost = session.run(
        cost,
        feed_dict={
            images: feature_batch,
            labels: label_batch,
        }
    )
    print('Cost = {0} - Validation Accuracy = {1} '.format(cost, validation_accuracy))

def train_with_all_batches():
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # Delete the # below line to restore an existing model and train it
        #tf.train.Saver().restore(sess, save_model_path)
        for iteration in range(MAX_ITERATIONS):
            num_of_batches = 5
            for batch_i in range(1, num_of_batches + 1):
                print('Iteration {:>2}, CIFAR-10 Batch {}:  '.format(iteration + 1, batch_i), end='')
                for batch_images, batch_labels in read_data.load_preprocess_training_batch(batch_i, BATCH_SIZE):
                    train_network(sess, optimizer, batch_images, batch_labels)
                print_stats(sess, batch_images, batch_labels, cost, accuracy) 
            saver = tf.train.Saver()
            final_save_path = saver.save(sess, save_model_path)
        
        # Save Model
        saver = tf.train.Saver()
        final_save_path = saver.save(sess, save_model_path)

def train_with_one_batch():    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for iteration in range(MAX_ITERATIONS):
            batch_i = 1
            print('Iteration {:>2}, CIFAR-10 Batch {}:  '.format(iteration + 1, batch_i), end='')
            for batch_images, batch_labels in read_data.load_preprocess_training_batch(batch_i, BATCH_SIZE):
                train_network(sess, optimizer, batch_images, batch_labels)
            print_stats(sess, batch_images, batch_labels, cost, accuracy)
        
        saver = tf.train.Saver()
        final_save_path = saver.save(sess, save_model_path)

<h3> Train Network </h3>

In [None]:
#train_with_one_batch()
train_with_all_batches()