## Todo:


In [1]:
EXPERIMENT_NAME = 'experiment_01_stanford40_train'

In [2]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random
import pickle

from utils import optimistic_restore, save, load
import layers

PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))
import pegasos

import pdb

In [3]:
'''
HYPERPARAMS
'''
TRAIN = False
BATCH_SIZE = 10
PATIENCE = 2
TRIPLETS_TRAIN = '/media/red/capstone/data/stanford40_triplets_train.pkl'
TRIPLETS_VALIDATION = '/media/red/capstone/data/stanford40_triplets_val.pkl'
FEATURE_FILE = '/media/red/capstone/data/stanford_40/vgg16_features.pkl'

LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.7
BETA1 = 0.9
BETA2 = 0.99
NUM_EPOCH = 100
RANDOM_SEED = 1234
SUMMARY_EVERY = 10
VALIDATION_PERCENTAGE = 0.05
SNAPSHOT_MAX = 3 # Keeps the last best 10 snapshots (best determined by validation accuracy)
SNAPSHOT_DIR = os.path.join('/media/red/capstone/snapshots/', EXPERIMENT_NAME)
RESTORE_FROM = os.path.join(SNAPSHOT_DIR, 'model.ckpt-14832')


# Network params
NORMALIZE = True
N_FEAT = 4096

np.random.seed(seed=RANDOM_SEED)

In [4]:
'''
Load Triplets
'''
class SVM_Triplet:
    def __init__(self, X1, X2, Y, base_classes, pos_class, new_class):
        self.X1 = X1
        self.X2 = X2
        self.Y = Y
        self.base_classes = base_classes
        self.pos_class = pos_class
        self.new_class = new_class
        
# Load features
triplets_loadin = {}
triplets_loadin['train'] = pickle.load(open(TRIPLETS_TRAIN, "rb"))
triplets_loadin['validation'] = pickle.load(open(TRIPLETS_VALIDATION, "rb"))

x_data = {
    'train':[],
    'validation':[]
}
y_data = {
    'train':[],
    'validation':[]
}
for partition in ['train', 'validation']:
    for triplet in triplets_loadin[partition]:
        if NORMALIZE:
            X1 = triplet.X1 / np.linalg.norm(triplet.X1, axis=0, keepdims=True)
            X2 = triplet.X2 / np.linalg.norm(triplet.X2, axis=0, keepdims=True)
            Y  = triplet.Y  / np.linalg.norm(triplet.Y,  axis=0, keepdims=True)
            x_data[partition].append(np.hstack((X1, X2)))
            y_data[partition].append(Y-X1)
        else:   
            x_data[partition].append(np.hstack((triplet.X1, triplet.X2)))
            y_data[partition].append(triplet.Y-triplet.X1)
        
x_data['train'] = np.stack(x_data['train'])
y_data['train'] = np.stack(y_data['train'])
x_data['validation'] = np.stack(x_data['validation'])
y_data['validation'] = np.stack(y_data['validation'])

In [5]:
'''
Declare model
'''

def lrelu(x, alpha=0.1):
    return tf.nn.relu(x) - alpha * tf.nn.relu(-x)

def net(x, is_training):
    def dense_block(n_units):
        stack.append(layers.fc(
            input=stack[-1],
            units=n_units,
            activation='relu',
            name='fc'
            )[0])
        stack.append(tf.contrib.layers.batch_norm(
                stack[-1], 
                center=True, scale=True, 
                is_training=is_training,
                scope='bn'))
        stack.append(lrelu(stack[-1]))
        
    n_units_list = [2*N_FEAT, 2*N_FEAT, N_FEAT]
    stack = [x,]
    for i, n in enumerate(n_units_list):
        with tf.variable_scope("block_"+str(i)):
            dense_block(n)
    stack.append(layers.fc(
            input=stack[-1],
            units=4096,
            activation='linear',
            name='fc_final'
            )[0])
    return stack

In [6]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(None, 8192))
if TRAIN:
    y = tf.placeholder(dtype=tf.float32, shape=(None, 4096))
is_training = tf.placeholder(dtype=tf.bool)

global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False, initializer=0) #incremented everytime optimizer runs
lr = tf.get_variable('learning_rate', dtype=tf.float32, trainable=False, initializer=LEARNING_RATE)

net = net(x, is_training)

'''
Loss, Metrics, and Optimization Setup
'''
pred = net[-1]
pred_normalized = pred / tf.norm(pred, axis=1, keep_dims=True)
if TRAIN:
    y_normalized = y / tf.norm(y,axis=1,keep_dims=True)
    reduced_loss = tf.losses.cosine_distance(
            labels=y_normalized,
            predictions=pred_normalized,
            dim=1,
            reduction=tf.losses.Reduction.MEAN,
            )
    train_loss_summary = tf.summary.scalar('training_loss', reduced_loss)

    optimizer = tf.train.AdamOptimizer(
            learning_rate=lr,
            beta1=BETA1,
            beta2=BETA2,
            name='AdamOptimizer')
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(reduced_loss, tvars), 5.0)
    train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

'''
TensorBoard Setup
'''
all_train_summary = tf.summary.merge_all()

summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
        graph=tf.get_default_graph())

'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
                       max_to_keep=SNAPSHOT_MAX)

'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Restore Weights
'''
if RESTORE_FROM is not None:
    print('Loading Weights')
    loader = tf.train.Saver(var_list=tf.global_variables())
    load(loader, sess, RESTORE_FROM)
else:
    pass
'''
Primary Loop
'''
if TRAIN:
    partition_types = ['train', 'validation']
    best_loss = float('inf')
    patience_counter = 0
    step_v = global_step.eval(session=sess)
    for epoch in range(NUM_EPOCH):
        overall_loss = 0 # Variables used for validation

        print('Training Epoch {}/{}'.format(
                epoch, NUM_EPOCH))

        for partition in partition_types: # Itr through data partitions
            n_exemplars = x_data[partition].shape[0]
            if partition == 'train':
                shuffle_indices = np.arange(n_exemplars)
                np.random.shuffle(shuffle_indices)
                x_data['train'] = x_data['train'][shuffle_indices, ...]
                y_data['train'] = y_data['train'][shuffle_indices, ...]
            step_v = global_step.eval(session=sess)
            for i in range(0, n_exemplars, BATCH_SIZE):
                upper_range = i+BATCH_SIZE
                if upper_range > n_exemplars:
                    upper_range = n_exemplars
                x_batch = x_data[partition][i:upper_range, ...]
                y_batch = y_data[partition][i:upper_range, ...]

                feed_dict = {
                    x:x_batch,
                    y:y_batch
                }
                if partition == 'train':
                    feed_dict[is_training] = True
                else:
                    feed_dict[is_training] = False

                # Run the proper sess run command
                if partition == 'train':
                    start_t = time()
                    if step_v % SUMMARY_EVERY == 0:
                        _, loss_v, summary_v = sess.run(
                            [train_op, reduced_loss, all_train_summary],
                            feed_dict=feed_dict)
                        summary_writer.add_summary(summary_v, step_v)
                        duration = time() - start_t
                        print('step {:d} \t loss = {:.3f} ({:.3f} sec/step)'.format(
                                step_v, loss_v, duration))
                    else: # Vanilla Training
                        _ = sess.run([train_op], feed_dict=feed_dict)
                    step_v = global_step.eval(session=sess)
                elif partition == 'validation':
                    feed_dict[is_training] = False
                    loss_v = sess.run(
                            [reduced_loss],
                            feed_dict=feed_dict)[0]
                    overall_loss += loss_v
            # Post-epoch routine for validation set (saving, stat computation, etc)
            if partition == 'validation':
                duration = time() - start_t
                overall_loss /= x_data['validation'].shape[0]
                overall_loss_summary = tf.Summary()
                overall_loss_summary.value.add(tag='validation_loss', simple_value=overall_loss)
                summary_writer.add_summary(overall_loss_summary, step_v)

                if overall_loss < best_loss:
                    print('New Best Loss {:.3f} < Old Best {:.3f}.  Saving...'.format(
                            overall_loss, best_loss))
                    best_loss = overall_loss
                    patience_counter = 0
                    save(saver, sess, SNAPSHOT_DIR, step_v)
                else:
                    patience_counter += 1
            if patience_counter >= PATIENCE:
                patience_counter = 0
                lr_v = lr.eval(session=sess) * LEARNING_RATE_DECAY
                lr.assign(lr_v).eval(session=sess)

Loading Weights
INFO:tensorflow:Restoring parameters from /media/red/capstone/snapshots/experiment_01_stanford40_train/model.ckpt-14832
Restored model parameters from /media/red/capstone/snapshots/experiment_01_stanford40_train/model.ckpt-14832


# SVM Accuracy Eval

In [7]:
def plot_pr(y_test, y_gt):
    average_precision = average_precision_score(y_test, y_gt)
    precision, recall, _ = precision_recall_curve(y_test, y_gt)

    plt.step(recall, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.5, color='b')

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall curve: AUC={0:0.2f}'.format(
              average_precision))
    plt.show()
    
class SVM_Triplet:
    def __init__(self, X1, X2, Y, base_classes, pos_class, new_class):
        self.X1 = X1
        self.X2 = X2
        self.Y = Y
        self.base_classes = base_classes
        self.pos_class = pos_class
        self.new_class = new_class

def prepare_features(pos_class, neg_classes, feature_vectors, is_train=True, 
                     equal_features=False, train_split=0.9):
    '''
    Returns 4096-dim features for each image which would be used for SVM training
    inputs : 
        is_train -> will return 90% of features for each class if is_train=True, else returns 10% features
        equal_features -> if set to True, then len(neg_features) = len(pos_features)
    
    Returns:
        pos_features -> features of images in the positive class
        neg_features -> features of images in the negative classes
    '''
    
    pos_partition = int(train_split*feature_vectors[pos_class].shape[0])
    neg_features = []
    if is_train:
        pos_features = feature_vectors[pos_class][:pos_partition]    # n x 4096
        for neg_class in neg_classes:
            neg_partition = int(train_split*feature_vectors[neg_class].shape[0])
            neg_features.extend(feature_vectors[neg_class][:neg_partition])
            
    else:
        pos_features = feature_vectors[pos_class][pos_partition:]    # n x 4096
        for neg_class in neg_classes:
            neg_partition = int(train_split*feature_vectors[neg_class].shape[0])
            neg_features.extend(feature_vectors[neg_class][neg_partition:])
             
    if equal_features:
        neg_features = np.random.permutation(neg_features)[:pos_features.shape[0]]
    
    return pos_features, neg_features
    
def compute_accuracy(weight_vector, pos_features, neg_features):
    classifier = pegasos.PegasosSVMClassifier()
    classifier.fit(np.zeros((2, 1024)), np.asarray([1, 0]))
    classifier.weight_vector.weights = weight_vector

    # Concat data and pass to SVM
    result = classifier.predict(np.vstack((pos_features, neg_features)))
    ground_truth = np.concatenate((np.ones(len(pos_features)), np.zeros(len(neg_features))))
    return np.average(np.equal(ground_truth, result)), result, ground_truth

def get_svm_weights(x_train, y_train, sklearn_SGD=False):
    if sklearn_SGD:
        clf = linear_model.SGDClassifier()
        clf.partial_fit(x_train, y_train, classes=np.unique(y_train))
        weights = clf.coef_
        return clf
    else:
        svm = pegasos.PegasosSVMClassifier()
        svm.fit(x_train, y_train)
        weight_vector = svm.weight_vector.weights
        return weight_vector


def get_x_y(pos_features, neg_features):
    x = np.vstack((pos_features, neg_features))
    y = np.hstack((np.ones( len(pos_features)),
                   np.zeros(len(neg_features))))
    return x, y


'''
We only need the negative features for the novel class. 
(It is negative with respect to the positive class)
'''
def online_svm_update(clf, neg_features):
    clf.partial_fit(neg_features, np.zeros(len(neg_features)))
    return clf
    

'''
SVM for novel class. 
pos_class = pos_class
neg_classes = base_classes - pos_class
'''
def compute_X1(pos_class, base_classes, feature_vectors, is_train=True, sklearn_SGD=False):
    neg_classes = np.delete(base_classes, np.argwhere(base_classes==pos_class))
    pos_features, neg_features = prepare_features(pos_class, neg_classes, feature_vectors, is_train=is_train)
    x_train, y_train = get_x_y(pos_features, neg_features)
    return get_svm_weights(x_train, y_train, sklearn_SGD=sklearn_SGD)
    


'''
SVM for novel class. 
pos_class = novel_class
neg_classes = base_classes
'''
def compute_X2(novel_class, base_classes, feature_vectors, is_train=True, sklearn_SGD=False):
    pos_features, neg_features = prepare_features(novel_class, base_classes, feature_vectors, is_train=is_train)
    x_train, y_train = get_x_y(pos_features, neg_features)
    return get_svm_weights(x_train, y_train, sklearn_SGD=sklearn_SGD)
    
'''
SVM for pos class under the influence of the neg class. 
pos_class = pos_class
neg_classes = base_classes - pos_class + novel_class
'''
def compute_Y(pos_class, novel_class, base_classes, feature_vectors, is_train=True, sklearn_SGD=False):
    neg_classes = np.delete(base_classes, np.argwhere(base_classes==pos_class))
    neg_classes = np.append(neg_classes, novel_class)
    pos_features, neg_features = prepare_features(pos_class, neg_classes, feature_vectors, is_train=is_train)
    x_train, y_train = get_x_y(pos_features, neg_features)
    return get_svm_weights(x_train, y_train, sklearn_SGD=sklearn_SGD)

In [8]:
# Compare our model with the baseline
features, file_names = pickle.load(open(FEATURE_FILE, "rb"))

seed = 1234
np.random.seed(seed)

# Splitting classes into train/val/test
labels = np.array(list(features.keys()))
perm = np.random.permutation(len(labels))
labels = labels[perm]

splits = {}
splits['base']=labels[:15]
splits['novel'] = labels[15:25]
splits['test']=labels[25:]

acc1_all, acc2_all = [], []

base_class_ind = np.random.permutation(len(splits['base']))[:10]
base_classes = splits['base'][base_class_ind]
  
for pos_class in base_classes:
    acc1, acc2 = [], []
    X1 = compute_X1(pos_class, base_classes, features, is_train=True)
    X1_norm = X1 / np.linalg.norm(X1, axis=0, keepdims=True)
    neg_classes = np.delete(base_classes, np.argwhere(base_classes==pos_class))
    
    for new_class in splits['test']:
        neg_classes_extra = np.append(neg_classes, new_class)    # 9 + 1 classes
        pos_features_test, neg_features_test = prepare_features(pos_class, 
                                                                neg_classes_extra, 
                                                                features, 
                                                                is_train=False, 
                                                                equal_features=False)
        acc1.append(compute_accuracy(X1, pos_features_test, neg_features_test)[0])

        # Test out our incremental hypothesis
        X2 = compute_X2(new_class, base_classes, features, is_train=True)
        X2_norm = X2 / np.linalg.norm(X2, axis=0, keepdims=True)
        X = np.hstack((X1_norm, X2_norm))
        X = np.reshape(X, (1, 8192))
        feed_dict = {
            x:X,
            is_training: False
        }
        Y_hat = sess.run(
                [pred],
                feed_dict=feed_dict)[0]
        Y = X1 + Y_hat.reshape((4096))
        acc2.append(compute_accuracy(Y, pos_features_test, neg_features_test)[0])
        #pdb.set_trace()
        print('%.5f %.5f %.5f %s|%s' % (acc1[-1], acc2[-1], acc2[-1] - acc1[-1], pos_class, new_class)) 

    acc1_all.append(acc1)
    acc2_all.append(acc2)
    
    
acc1_all = np.array(acc1_all)
acc2_all = np.array(acc2_all)
acc1_mean = np.mean(acc1_all, axis=0)
acc2_mean = np.mean(acc2_all, axis=0)

0.88060 0.88060 0.00000 jumping|cleaning_the_floor
0.87970 0.87970 0.00000 jumping|brushing_teeth
0.86517 0.86517 0.00000 jumping|cutting_trees
0.88364 0.88364 0.00000 jumping|cooking
0.82609 0.82609 0.00000 jumping|climbing
0.87454 0.87454 0.00000 jumping|reading
0.87868 0.87868 0.00000 jumping|drinking
0.87925 0.87925 0.00000 jumping|washing_dishes


KeyboardInterrupt: 