## Todo:
- Experiment with normalization while creating triplets
- Do you need dropout in the regressor network?
- Simultaneous feature learning?
- See when are results bad/equal/better? See which classes they correspond to.. 


In [1]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random

from utils import optimistic_restore, save
import layers

PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))
import pickle_utils
import cifar_utils

import pdb

In [2]:
'''
HYPERPARAMS
'''
BATCH_SIZE = 10
DATA_PATH = '/media/red/capstone/data/cifar-100/cifar-custom'
LEARNING_RATE = 1e-4
BETA1 = 0.9
BETA2 = 0.99
NUM_CLASSES = 40
NUM_EPOCH = 100
RANDOM_SEED = 1234
SUMMARY_EVERY = 10
VALIDATION_PERCENTAGE = 0.05
SNAPSHOT_MAX = 10 # Keeps the last best 10 snapshots (best determined by validation accuracy)
SNAPSHOT_DIR = '/media/red/capstone/snapshots/feature_extractor_vgg16'
PRETRAINED_WEIGHT_FILE = '/media/red/capstone/pretrained_weights/vgg16_weights.npz'

np.random.seed(seed=RANDOM_SEED)

In [3]:
'''
Load custom CIFAR data. 
'''
# cifar_raw = pickle_utils.load(DATA_PATH)
custom_dataset = pickle_utils.load(DATA_PATH)

data_x, data_y = [], []
for label in custom_dataset['training'].keys():
    for item in custom_dataset['training'][label]:
        data_x.append(item) # 28 x 28 x 3
        data_y.append(label) # 0-39
data_x = np.stack(data_x).astype(np.float32)
data_x = np.flip(data_x, axis=-1) # BGR
data_y = np.stack(data_y).astype(np.int32)

# Normalize x
data_x = (data_x / 255.0) - 0.5

def round_to(n, precision):
    return int( n/precision+0.5 ) * precision

n_total_data = data_x.shape[0]
n_validation = round_to(VALIDATION_PERCENTAGE * n_total_data, BATCH_SIZE)
batches_per_epoch = np.round((n_total_data - n_validation) / BATCH_SIZE)
# Shuffle data
random_indices = np.random.permutation(n_total_data)
train_indices = cycle(random_indices[n_validation:])
validation_indices = random_indices[:n_validation]

In [None]:
'''
Declare model
'''
class vgg16:
    '''
    VGG16 Model with ImageNet pretrained weight loader method
    Weights can be downloaded from:
    https://www.cs.toronto.edu/~frossard/vgg16/vgg16_weights.npz
    '''

    def __init__(self, x, y, phase):
        '''
        Sets up network enough to do a forward pass.
        '''

        """ init the model with hyper-parameters etc """

        # List used for loading weights from vgg16.npz (if necessary)
        self.parameters = []
        self.CONV_ACTIVATION = 'relu'
        self.FC_ACTIVATION   = 'relu'

        ########
        # Misc #
        ########
        self.global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False,
                        initializer=0)
        self.learning_rate = LEARNING_RATE
        self.IM_SHAPE = [224, 224, 3]

        ####################
        # I/O placeholders #
        ####################
        self.x = x
        self.x.set_shape([None]+self.IM_SHAPE)
        self.y = tf.to_int32(y)

        ###############
        # Main Layers #
        ###############
        with tf.variable_scope('conv_layers'):
            self._convlayers()
        with tf.variable_scope('fc_layers'):
            self._fc_layers()

        ######################
        # Define Collections #
        ######################
        self.conv_trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                "conv_layers")
        self.fc_trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                "fc_layers")

    def evaluate(self):
        '''
        Returns the count of correct classifications (Tensor).
        '''
        # Bool Tensor where 1 is correct and 0 is incorrect
        correct = tf.nn.in_top_k(self.predictions, self.y, 1)
        # Average them to get accuracy.  Must cast to a float32
        self.accuracy = tf.reduce_mean(tf.to_float(correct))
        return self.accuracy

    #####################
    # Private Functions #
    #####################
    def _convlayers(self):
        '''
        All conv and pooling layers of VGG16
        '''
        # zero-mean input; resizing has to be done beforehand for uniform tensor shape
        with tf.variable_scope('preprocess'):
            mean = tf.constant([123.68, 116.779, 103.939],
                    dtype=tf.float32,
                    shape=[1, 1, 1, 3],
                    name='img_mean')
            self.images = self.x*255.0 - mean

        # conv1_1
        self.conv1_1, weights, biases = layers.conv2d(name='conv1_1',
                input=self.images,
                shape=(3,3,3,64),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv1_2
        self.conv1_2, weights, biases = layers.conv2d(name='conv1_2',
                input=self.conv1_1,
                shape=(3,3,64,64),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool1
        self.pool1 = tf.nn.max_pool(self.conv1_2,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool1')

        # conv2_1
        self.conv2_1, weights, biases = layers.conv2d(name='conv2_1',
                input=self.pool1,
                shape=(3,3,64,128),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv2_2
        self.conv2_2, weights, biases = layers.conv2d(name='conv2_2',
                input=self.conv2_1,
                shape=(3,3,128,128),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool2
        self.pool2 = tf.nn.max_pool(self.conv2_2,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool2')

        # conv3_1
        self.conv3_1, weights, biases = layers.conv2d(name='conv3_1',
                input=self.pool2,
                shape=(3,3,128,256),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv3_2
        self.conv3_2, weights, biases = layers.conv2d(name='conv3_2',
                input=self.conv3_1,
                shape=(3,3,256,256),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv3_3
        self.conv3_3, weights, biases = layers.conv2d(name='conv3_3',
                input=self.conv3_2,
                shape=(3,3,256,256),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool3
        self.pool3 = tf.nn.max_pool(self.conv3_3,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool3')

        # conv4_1
        self.conv4_1, weights, biases = layers.conv2d(name='conv4_1',
                input=self.pool3,
                shape=(3,3,256,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv4_2
        self.conv4_2, weights, biases = layers.conv2d(name='conv4_2',
                input=self.conv4_1,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv4_3
        self.conv4_3, weights, biases = layers.conv2d(name='conv4_3',
                input=self.conv4_2,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool4
        self.pool4 = tf.nn.max_pool(self.conv4_3,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool4')

        # conv5_1
        self.conv5_1, weights, biases = layers.conv2d(name='conv5_1',
                input=self.pool4,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv5_2
        self.conv5_2, weights, biases = layers.conv2d(name='conv5_2',
                input=self.conv5_1,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # conv5_3
        self.conv5_3, weights, biases = layers.conv2d(name='conv5_3',
                input=self.conv5_2,
                shape=(3,3,512,512),
                padding='SAME',
                strides = [1,1,1,1],
                activation=self.CONV_ACTIVATION)
        self.parameters += [weights, biases]

        # pool5
        self.pool5 = tf.nn.max_pool(self.conv5_3,
                ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1],
                padding='SAME',
                name='pool5')

    def _fc_layers(self):
        '''
        All FC layers of VGG16 (+custom layers)
        '''
        # fc1
        self.fc1, weights, biases = layers.fc(name='fc1',
                input=tf.contrib.layers.flatten(self.pool5),
                units=4096,
                activation=self.FC_ACTIVATION)
        self.parameters += [weights, biases]

        # fc2
        self.fc2, weights, biases = layers.fc(name='fc2',
                input=self.fc1,
                units=4096,
                activation=self.FC_ACTIVATION)
        self.parameters += [weights, biases]

        # fc3
        self.fc3, weights, biases = layers.fc(name='fc3',
                input=self.fc2,
                units=NUM_CLASSES,
                activation='linear')

    def load_pretrained_weights(self, sess):
        '''
        Load Pretrained VGG16 weights from .npz file
        (weights converted from Caffe)
        To only be used when no TensorFlow Snapshot is avaialable.
        Assumes layers are properly added to self.parameters.
        '''
        print("Loading Imagenet Weights.")

        weights = np.load(PRETRAINED_WEIGHT_FILE)
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            print(i, k, np.shape(weights[k]))
            try:
                sess.run(self.parameters[i].assign(weights[k]))
            except:
                print("%s layer not found." % k)

In [None]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(BATCH_SIZE, 32, 32, 3))
x_resized = tf.image.resize_images(x, (224, 224))
y = tf.placeholder(dtype=tf.int32, shape=(BATCH_SIZE))
is_training = tf.placeholder(dtype=tf.bool)

net = vgg16(x_resized, y, is_training)

'''
Loss, Metrics, and Optimization Setup
'''
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=y, #GT probability distribution
        logits=net.fc3, # unscaled log prob
        name='sparse_softmax_cross_entropy')

reduced_loss = tf.reduce_mean(loss)
train_loss_summary = tf.summary.scalar('training_loss', reduced_loss)

optimizer = tf.train.AdamOptimizer(
        learning_rate=LEARNING_RATE,
        beta1=BETA1,
        beta2=BETA2,
        name='AdamOptimizer')
train_op = optimizer.minimize(reduced_loss)

pred = tf.nn.softmax(
        logits=net.fc3,
        name='softmax')
pred_class = tf.cast(tf.argmax(pred, axis=1), tf.int32)
acc = tf.reduce_mean(tf.cast(
        tf.equal(y, pred_class),
        tf.float32))

train_acc_summary = tf.summary.scalar('training_accuracy', acc)


'''
TensorBoard Setup
'''
all_train_summary = tf.summary.merge_all()

summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
        graph=tf.get_default_graph())

'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
                       max_to_keep=SNAPSHOT_MAX)

'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Load Pretrained Weights (ImageNet)
'''
net.load_pretrained_weights(sess)

'''
Declare Validation Loop
'''
def run_validation():
    global best_acc
    
    start_t = time()
    overall_acc = 0
    overall_loss = 0
    for j in range(0, n_validation, BATCH_SIZE):
        # Assemble Batch
        idx = validation_indices[j:(j+BATCH_SIZE)]
        x_batch = data_x[idx,...]
        y_batch = data_y[idx,...]
        
        feed_dict = {x:x_batch,
                 y:y_batch,
                 is_training: False}
        loss_v, acc_v, pred_v = sess.run(
                [reduced_loss, acc, pred],
                feed_dict=feed_dict)
        overall_acc += acc_v
        overall_loss += loss_v
        
        
    duration = time() - start_t
    overall_acc /= (n_validation / BATCH_SIZE)
    overall_loss /= (n_validation / BATCH_SIZE)
    
    overall_acc_summary = tf.Summary()
    overall_acc_summary.value.add(tag='validation_accuracy', simple_value=overall_acc)
    overall_loss_summary = tf.Summary()
    overall_loss_summary.value.add(tag='validation_loss', simple_value=overall_loss)

    summary_writer.add_summary(overall_acc_summary, step)
    summary_writer.add_summary(overall_loss_summary, step)
    
    print('VALIDATION \t acc = {:.3f} ({:.3f} sec)'.format(
                overall_acc, duration))
    if overall_acc > best_acc:
        print('New Best Accuracy {:.3f} > Old Best {:.3f}.  Saving...'.format(
                overall_acc, best_acc))
        best_acc = overall_acc
        save(saver, sess, SNAPSHOT_DIR, step)
        
'''
Main Training Loop
'''
step = 0
epoch = 0
best_acc = 0
while epoch < NUM_EPOCH:
    step += 1
    # Allocate Space For Batch
    x_batch = np.zeros((BATCH_SIZE,) + data_x.shape[1:], dtype=np.float32)
    y_batch = np.zeros((BATCH_SIZE,) + data_y.shape[1:], dtype=np.int32)
    
    # Run Validation
    if step % batches_per_epoch == 0:
        epoch += 1
        run_validation()
        
    # Form Training Batch
    start_t = time()
    for i in range(BATCH_SIZE):
        idx = next(train_indices)
        x_batch[i,...] = data_x[idx, ...]
        y_batch[i,...] = data_y[idx, ...]
    
    # Data Augmentation
    if random.random() < 0.5:
        x_batch = np.fliplr(x_batch)
        
    # Prepare Feed Dictionary
    feed_dict = {x:x_batch,
                 y:y_batch,
                 is_training: True}
    # Run Training Summary
    if step % SUMMARY_EVERY == 0:
        loss_v, _, summary_v, acc_v, pred_v = sess.run(
                [reduced_loss, train_op, all_train_summary, acc, pred],
                feed_dict=feed_dict)
        summary_writer.add_summary(summary_v, step)
        duration = time() - start_t
        print('step {:d} \t loss = {:.3f}, train_acc = {:.3f} ({:.3f} sec/step)'.format(
                step, loss_v, acc_v, duration))
    else: # Run Simple Train
        loss_v, _ = sess.run([reduced_loss, train_op],
                feed_dict=feed_dict)


Loading Imagenet Weights.
0 conv1_1_W (3, 3, 3, 64)
1 conv1_1_b (64,)
2 conv1_2_W (3, 3, 64, 64)
3 conv1_2_b (64,)
4 conv2_1_W (3, 3, 64, 128)
5 conv2_1_b (128,)
6 conv2_2_W (3, 3, 128, 128)
7 conv2_2_b (128,)
8 conv3_1_W (3, 3, 128, 256)
9 conv3_1_b (256,)
10 conv3_2_W (3, 3, 256, 256)
11 conv3_2_b (256,)
12 conv3_3_W (3, 3, 256, 256)
13 conv3_3_b (256,)
14 conv4_1_W (3, 3, 256, 512)
15 conv4_1_b (512,)
16 conv4_2_W (3, 3, 512, 512)
17 conv4_2_b (512,)
18 conv4_3_W (3, 3, 512, 512)
19 conv4_3_b (512,)
20 conv5_1_W (3, 3, 512, 512)
21 conv5_1_b (512,)
22 conv5_2_W (3, 3, 512, 512)
23 conv5_2_b (512,)
24 conv5_3_W (3, 3, 512, 512)
25 conv5_3_b (512,)
26 fc6_W (25088, 4096)
27 fc6_b (4096,)
28 fc7_W (4096, 4096)
29 fc7_b (4096,)
30 fc8_W (4096, 1000)
fc8_W layer not found.
31 fc8_b (1000,)
fc8_b layer not found.
step 10 	 loss = 4.016, train_acc = 0.100 (3.284 sec/step)
step 20 	 loss = 3.802, train_acc = 0.000 (3.264 sec/step)
step 30 	 loss = 3.768, train_acc = 0.100 (3.209 sec/step)
s

step 1240 	 loss = 2.155, train_acc = 0.400 (3.167 sec/step)
step 1250 	 loss = 2.729, train_acc = 0.200 (3.202 sec/step)
step 1260 	 loss = 2.669, train_acc = 0.400 (3.197 sec/step)
step 1270 	 loss = 3.279, train_acc = 0.200 (3.172 sec/step)
step 1280 	 loss = 2.550, train_acc = 0.200 (3.194 sec/step)
step 1290 	 loss = 3.035, train_acc = 0.200 (3.181 sec/step)
step 1300 	 loss = 2.790, train_acc = 0.100 (3.197 sec/step)
step 1310 	 loss = 3.025, train_acc = 0.100 (3.171 sec/step)
step 1320 	 loss = 2.360, train_acc = 0.200 (3.176 sec/step)
step 1330 	 loss = 2.270, train_acc = 0.400 (3.234 sec/step)
step 1340 	 loss = 2.807, train_acc = 0.100 (3.195 sec/step)
step 1350 	 loss = 2.853, train_acc = 0.200 (3.172 sec/step)
step 1360 	 loss = 3.099, train_acc = 0.200 (3.178 sec/step)
step 1370 	 loss = 2.313, train_acc = 0.200 (3.228 sec/step)
step 1380 	 loss = 2.563, train_acc = 0.200 (3.187 sec/step)
step 1390 	 loss = 3.341, train_acc = 0.100 (3.209 sec/step)
step 1400 	 loss = 2.515

step 2570 	 loss = 3.213, train_acc = 0.000 (3.158 sec/step)
step 2580 	 loss = 1.551, train_acc = 0.400 (3.254 sec/step)
step 2590 	 loss = 2.392, train_acc = 0.200 (3.215 sec/step)
step 2600 	 loss = 3.312, train_acc = 0.100 (3.195 sec/step)
step 2610 	 loss = 1.465, train_acc = 0.600 (3.203 sec/step)
step 2620 	 loss = 1.292, train_acc = 0.500 (3.197 sec/step)
step 2630 	 loss = 2.011, train_acc = 0.500 (3.230 sec/step)
step 2640 	 loss = 1.273, train_acc = 0.600 (3.174 sec/step)
step 2650 	 loss = 2.261, train_acc = 0.300 (3.204 sec/step)
step 2660 	 loss = 2.621, train_acc = 0.100 (3.203 sec/step)
step 2670 	 loss = 1.360, train_acc = 0.700 (3.283 sec/step)
step 2680 	 loss = 2.424, train_acc = 0.300 (3.209 sec/step)
step 2690 	 loss = 1.961, train_acc = 0.400 (3.307 sec/step)
step 2700 	 loss = 2.408, train_acc = 0.300 (3.200 sec/step)
step 2710 	 loss = 2.306, train_acc = 0.200 (3.248 sec/step)
step 2720 	 loss = 1.642, train_acc = 0.600 (3.231 sec/step)
step 2730 	 loss = 1.951

step 3900 	 loss = 1.573, train_acc = 0.400 (3.186 sec/step)
step 3910 	 loss = 1.900, train_acc = 0.300 (3.202 sec/step)
step 3920 	 loss = 1.980, train_acc = 0.500 (3.281 sec/step)
step 3930 	 loss = 1.156, train_acc = 0.600 (3.214 sec/step)
