## Todo:
- Experiment with normalization while creating triplets
- Do you need dropout in the regressor network?
- Simultaneous feature learning?
- See when are results bad/equal/better? See which classes they correspond to.. 

# Todo on TF model
- implement data import
- implement data normalization
- debug

In [1]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random

from utils import optimistic_restore, save
import layers

PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))
import pickle_utils
import cifar_utils

import pdb

In [2]:
'''
HYPERPARAMS
'''
BATCH_SIZE = 10
DATA_PATH = '/media/red/capstone/data/cifar-100/cifar-custom'
LEARNING_RATE = 1e-4
BETA1 = 0.9
BETA2 = 0.99
NUM_CLASSES = 40
NUM_EPOCH = 100
RANDOM_SEED = 1234
SUMMARY_EVERY = 1000
VALIDATION_PERCENTAGE = 0.05
SNAPSHOT_MAX = 10 # Keeps the last best 10 snapshots (best determined by validation accuracy)
SNAPSHOT_DIR = '/media/red/capstone/snapshots/feature_extractor_cnn_lr_augment'

np.random.seed(seed=RANDOM_SEED)

In [3]:
'''
Load custom CIFAR data. 
'''
# cifar_raw = pickle_utils.load(DATA_PATH)
custom_dataset = pickle_utils.load(DATA_PATH)

data_x, data_y = [], []
for label in custom_dataset['training'].keys():
    for item in custom_dataset['training'][label]:
        data_x.append(item) # 28 x 28 x 3
        data_y.append(label) # 0-39
data_x = np.stack(data_x).astype(np.float32)
data_y = np.stack(data_y).astype(np.int32)

# Normalize x
data_x = (data_x / 255.0) - 0.5

def round_to(n, precision):
    return int( n/precision+0.5 ) * precision

n_total_data = data_x.shape[0]
n_validation = round_to(VALIDATION_PERCENTAGE * n_total_data, BATCH_SIZE)
batches_per_epoch = np.round((n_total_data - n_validation) / BATCH_SIZE)
# Shuffle data
random_indices = np.random.permutation(n_total_data)
train_indices = cycle(random_indices[n_validation:])
validation_indices = random_indices[:n_validation]

In [4]:
'''
Declare model
'''
def feature_extractor_cnn(x, y, is_training):
    net = []
    net.append(layers.conv2d(
            input=x,
            shape=(3,3,3,32),
            padding='SAME',
            activation='relu',
            name='conv_1_1'
            )[0])
    net.append(layers.batch_norm(
            x=net[-1],
            phase=is_training,
            name='bn_1_2'))
    net.append(layers.conv2d(
            input=net[-1],
            shape=(3,3,32,32),
            padding='SAME',
            activation='relu',
            bias=False,
            name='conv_1_2'
            )[0])
    net.append(tf.nn.max_pool(
            value=net[-1],
            ksize=[1, 2, 2, 1],
            strides=[1,2,2,1],
            padding='SAME',
            name='pool_1'
            ))
    net.append(layers.batch_norm(
            x=net[-1],
            phase=is_training,
            name='bn_2_1'))
    net.append(layers.conv2d(
            input=net[-1],
            shape=(3,3,32,64),
            padding='SAME',
            activation='relu',
            bias=False,
            name='conv_2_1'
            )[0])
    net.append(layers.batch_norm(
            x=net[-1],
            phase=is_training,
            name='bn_2_2'))
    net.append(layers.conv2d(
            input=net[-1],
            shape=(3,3,64,64),
            padding='SAME',
            activation='relu',
            bias=False,
            name='conv_2_2'
            )[0])
    net.append(tf.nn.max_pool(
            value=net[-1],
            ksize=[1, 2, 2, 1],
            strides=[1,2,2,1],
            padding='SAME',
            name='pool_2'
            ))

    net.append(tf.contrib.layers.flatten(
            inputs=net[-1],
            scope='flat_1'
            ))
    net.append(layers.fc(
            input=net[-1],
            units=1024,
            activation='relu',
            name='fc_1'
            )[0])
    net.append(tf.nn.dropout(
            x=net[-1],
            keep_prob=0.5,
            name='dropout_fc_1'
            ))
    net.append(layers.fc(
            input=net[-1],
            units=1024,
            activation='relu',
            name='fc_2'
            )[0])
    net.append(tf.nn.dropout(
            x=net[-1],
            keep_prob=0.5,
            name='dropout_fc_2'
            ))
    net.append(layers.fc(
            input=net[-1],
            units=NUM_CLASSES,
            activation='linear',
            name='fc_3'
            )[0])
    return net

In [5]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(BATCH_SIZE, 32, 32, 3))
y = tf.placeholder(dtype=tf.int32, shape=(BATCH_SIZE))
is_training = tf.placeholder(dtype=tf.bool)

net = feature_extractor_cnn(x, y, is_training)

'''
Loss, Metrics, and Optimization Setup
'''
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=y, #GT probability distribution
        logits=net[-1], # unscaled log prob
        name='sparse_softmax_cross_entropy')

reduced_loss = tf.reduce_mean(loss)
train_loss_summary = tf.summary.scalar('training_loss', reduced_loss)

optimizer = tf.train.AdamOptimizer(
        learning_rate=LEARNING_RATE,
        beta1=BETA1,
        beta2=BETA2,
        name='AdamOptimizer')
train_op = optimizer.minimize(reduced_loss)

pred = tf.nn.softmax(
        logits=net[-1],
        name='softmax')
pred_class = tf.cast(tf.argmax(pred, axis=1), tf.int32)
acc = tf.reduce_mean(tf.cast(
            tf.equal(y, pred_class),
        tf.float32))

train_acc_summary = tf.summary.scalar('training_accuracy', acc)


'''
TensorBoard Setup
'''
all_train_summary = tf.summary.merge_all()

summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
        graph=tf.get_default_graph())

'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
                       max_to_keep=SNAPSHOT_MAX)

'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Declare Validation Loop
'''
def run_validation():
    global best_acc
    
    start_t = time()
    overall_acc = 0
    overall_loss = 0
    for j in range(0, n_validation, BATCH_SIZE):
        # Assemble Batch
        idx = validation_indices[j:(j+BATCH_SIZE)]
        x_batch = data_x[idx,...]
        y_batch = data_y[idx,...]
        
        feed_dict = {x:x_batch,
                 y:y_batch,
                 is_training: False}
        loss_v, acc_v, pred_v = sess.run(
                [reduced_loss, acc, pred],
                feed_dict=feed_dict)
        overall_acc += acc_v
        overall_loss += loss_v
        
        
    duration = time() - start_t
    overall_acc /= (n_validation / BATCH_SIZE)
    overall_loss /= (n_validation / BATCH_SIZE)
    
    overall_acc_summary = tf.Summary()
    overall_acc_summary.value.add(tag='validation_accuracy', simple_value=overall_acc)
    overall_loss_summary = tf.Summary()
    overall_loss_summary.value.add(tag='validation_loss', simple_value=overall_loss)

    summary_writer.add_summary(overall_acc_summary, step)
    summary_writer.add_summary(overall_loss_summary, step)
    
    print('VALIDATION \t acc = {:.3f} ({:.3f} sec)'.format(
                overall_acc, duration))
    if overall_acc > best_acc:
        print('New Best Accuracy {:.3f} > Old Best {:.3f}.  Saving...'.format(
                overall_acc, best_acc))
        best_acc = overall_acc
        save(saver, sess, SNAPSHOT_DIR, step)
        
'''
Main Training Loop
'''
step = 0
epoch = 0
best_acc = 0
while epoch < NUM_EPOCH:
    step += 1
    # Allocate Space For Batch
    x_batch = np.zeros((BATCH_SIZE,) + data_x.shape[1:], dtype=np.float32)
    y_batch = np.zeros((BATCH_SIZE,) + data_y.shape[1:], dtype=np.int32)
    
    # Run Validation
    if step % batches_per_epoch == 0:
        epoch += 1
        run_validation()
        
    # Form Training Batch
    start_t = time()
    for i in range(BATCH_SIZE):
        idx = next(train_indices)
        x_batch[i,...] = data_x[idx, ...]
        y_batch[i,...] = data_y[idx, ...]
    
    # Data Augmentation
    if random.random() < 0.5:
        x_batch = np.fliplr(x_batch)
        
    # Prepare Feed Dictionary
    feed_dict = {x:x_batch,
                 y:y_batch,
                 is_training: True}
    # Run Training Summary
    if step % SUMMARY_EVERY == 0:
        loss_v, _, summary_v, acc_v, pred_v = sess.run(
                [reduced_loss, train_op, all_train_summary, acc, pred],
                feed_dict=feed_dict)
        summary_writer.add_summary(summary_v, step)
        duration = time() - start_t
        print('step {:d} \t loss = {:.3f}, train_acc = {:.3f} ({:.3f} sec/step)'.format(
                step, loss_v, acc_v, duration))
    else: # Run Simple Train
        loss_v, _ = sess.run([reduced_loss, train_op],
                feed_dict=feed_dict)


step 1000 	 loss = 2.958, train_acc = 0.100 (0.165 sec/step)
VALIDATION 	 acc = 0.108 (0.359 sec)
New Best Accuracy 0.108 > Old Best 0.000.  Saving...
The checkpoint has been created.
step 2000 	 loss = 3.456, train_acc = 0.100 (0.129 sec/step)
step 3000 	 loss = 3.319, train_acc = 0.200 (0.126 sec/step)
VALIDATION 	 acc = 0.164 (0.348 sec)
New Best Accuracy 0.164 > Old Best 0.108.  Saving...
The checkpoint has been created.
step 4000 	 loss = 3.048, train_acc = 0.000 (0.126 sec/step)
step 5000 	 loss = 2.610, train_acc = 0.200 (0.128 sec/step)
VALIDATION 	 acc = 0.195 (0.350 sec)
New Best Accuracy 0.195 > Old Best 0.164.  Saving...
The checkpoint has been created.
step 6000 	 loss = 2.943, train_acc = 0.200 (0.130 sec/step)
step 7000 	 loss = 1.999, train_acc = 0.500 (0.128 sec/step)
VALIDATION 	 acc = 0.229 (0.349 sec)
New Best Accuracy 0.229 > Old Best 0.195.  Saving...
The checkpoint has been created.
step 8000 	 loss = 3.693, train_acc = 0.000 (0.127 sec/step)
step 9000 	 loss = 2

KeyboardInterrupt: 