## Todo:
- Experiment with normalization while creating triplets
- Do you need dropout in the regressor network?
- Simultaneous feature learning?
- See when are results bad/equal/better? See which classes they correspond to.. 

# Todo on TF model
- implement data import
- implement data normalization
- debug

In [1]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle

from utils import optimistic_restore, save
import layers

PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))
import pickle_utils
import cifar_utils

import pdb

In [2]:
'''
HYPERPARAMS
'''
BATCH_SIZE = 10
DATA_PATH = '/media/red/capstone/data/cifar-100/cifar-custom'
LEARNING_RATE = 2.5e-4
BETA1 = 0.9
BETA2 = 0.99
NUM_CLASSES = 40
NUM_STEPS = 20001
RANDOM_SEED = 1234
SAVE_PRED_EVERY = 1000
SNAPSHOT_MAX = 10
SNAPSHOT_DIR = './snapshots/'


In [3]:
'''
Load custom CIFAR data. 
'''
# cifar_raw = pickle_utils.load(DATA_PATH)
custom_dataset = pickle_utils.load(DATA_PATH)

data_x, data_y = [], []
for label in custom_dataset['training'].keys():
    for item in custom_dataset['training'][label]:
        data_x.append(item) # 28 x 28 x 3
        data_y.append(label) # 0-39
data_x = np.stack(data_x)
data_y = np.stack(data_y).astype(np.int32)

NUM_DATA_POINTS = data_x.shape[0]
# Shuffle data
random_indices = np.random.permutation(NUM_DATA_POINTS)
data_indices = cycle(random_indices)

In [4]:
'''
Declare model
'''
def feature_extractor_cnn(x, y, is_training):
    net = []
    net.append(layers.conv2d(
            input=x,
            shape=(3,3,3,32),
            padding='SAME',
            activation='relu',
            name='conv_1_1'
            )[0])
    net.append(layers.batch_norm(
            x=net[-1],
            phase=is_training,
            name='bn_1_2'))
    net.append(layers.conv2d(
            input=net[-1],
            shape=(3,3,32,32),
            padding='SAME',
            activation='relu',
            bias=False,
            name='conv_1_2'
            )[0])
    net.append(tf.nn.max_pool(
            value=net[-1],
            ksize=[1, 2, 2, 1],
            strides=[1,2,2,1],
            padding='SAME',
            name='pool_1'
            ))
    net.append(layers.batch_norm(
            x=net[-1],
            phase=is_training,
            name='bn_2_1'))
    net.append(layers.conv2d(
            input=net[-1],
            shape=(3,3,32,64),
            padding='SAME',
            activation='relu',
            bias=False,
            name='conv_2_1'
            )[0])
    net.append(layers.batch_norm(
            x=net[-1],
            phase=is_training,
            name='bn_2_2'))
    net.append(layers.conv2d(
            input=net[-1],
            shape=(3,3,64,64),
            padding='SAME',
            activation='relu',
            bias=False,
            name='conv_2_2'
            )[0])
    net.append(tf.nn.max_pool(
            value=net[-1],
            ksize=[1, 2, 2, 1],
            strides=[1,2,2,1],
            padding='SAME',
            name='pool_2'
            ))

    net.append(tf.contrib.layers.flatten(
            inputs=net[-1],
            scope='flat_1'
            ))
    net.append(layers.fc(
            input=net[-1],
            units=1024,
            activation='relu',
            name='fc_1'
            )[0])
    net.append(tf.nn.dropout(
            x=net[-1],
            keep_prob=0.5,
            name='dropout_fc_1'
            ))
    net.append(layers.fc(
            input=net[-1],
            units=1024,
            activation='relu',
            name='fc_2'
            )[0])
    net.append(tf.nn.dropout(
            x=net[-1],
            keep_prob=0.5,
            name='dropout_fc_2'
            ))
    net.append(layers.fc(
            input=net[-1],
            units=NUM_CLASSES,
            activation='linear',
            name='fc_3'
            )[0])
    return net

In [5]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(BATCH_SIZE, 32, 32, 3))
y = tf.placeholder(dtype=tf.int32, shape=(BATCH_SIZE))
is_training = tf.placeholder(dtype=tf.bool)

net = feature_extractor_cnn(x, y, is_training)

pred = tf.nn.softmax(
        logits=net[-1],
        name='softmax')

'''
Loss and Optimization Setup
'''
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=y, #GT probability distribution
        logits=net[-1], # unscaled log prob
        name='sparse_softmax_cross_entropy')

reduced_loss = tf.reduce_mean(loss)

optimizer = tf.train.AdamOptimizer(
        learning_rate=LEARNING_RATE,
        beta1=BETA1,
        beta2=BETA2,
        name='AdamOptimizer')
train_op = optimizer.minimize(reduced_loss)

'''
TensorBoard Setup
'''
all_summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
        graph=tf.get_default_graph())

'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
                       max_to_keep=SNAPSHOT_MAX)

'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Main Training Loop
'''
for step in range(NUM_STEPS):
    start_t = time()
    
    # Create Batch
    x_batch = np.zeros((BATCH_SIZE,) + data_x.shape[1:])
    y_batch = np.zeros((BATCH_SIZE,) + data_y.shape[1:])
    
    for i in range(BATCH_SIZE):
        idx = next(data_indices)
        x_batch[i,...] = data_x[idx, ...]
        y_batch[i,...] = data_y[idx, ...]
        
    feed_dict = {x:x_batch,
                 y:y_batch,
                 is_training: True}
    
    # Run Graph
    if step % SAVE_PRED_EVERY == 0:
        loss_v, _, summary_v  = sess.run([reduced_loss, train_op, all_summary],
                feed_dict=feed_dict)
        summary_writer.add_summary(summary_v, step)
        save(saver, sess, SNAPSHOT_DIR, step)
        duration = time() - start_t
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
        step, loss_v, duration))
    else:
        loss_v, _ = sess.run([reduced_loss, train_op],
                feed_dict=feed_dict)

The checkpoint has been created.
step 0 	 loss = 5.447, (0.572 sec/step)
The checkpoint has been created.
step 1000 	 loss = 3.659, (0.255 sec/step)
The checkpoint has been created.
step 2000 	 loss = 3.537, (0.303 sec/step)
The checkpoint has been created.
step 3000 	 loss = 3.299, (0.259 sec/step)
The checkpoint has been created.
step 4000 	 loss = 2.646, (0.251 sec/step)
The checkpoint has been created.
step 5000 	 loss = 2.476, (0.246 sec/step)
The checkpoint has been created.
step 6000 	 loss = 2.195, (0.246 sec/step)
The checkpoint has been created.
step 7000 	 loss = 1.825, (0.246 sec/step)


KeyboardInterrupt: 