## Adapted from [Magnus Erik Hvass Pedersen](http://www.hvass-labs.org/)
#### Import TensorFlow and inception model helper file.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import tensorflow as tf
import prettytensor as pt
from sklearn.metrics import confusion_matrix
from sklearn.cross_validation import KFold
from scipy.misc import imread
import itertools
import numpy as np
import pickle
import os

# Functions and classes for loading and using the Inception model.
import _inception as inception
import _dataset as ds

#### Download the model if necessary, and load it

In [None]:
inception.data_dir = 'inception_data/'
inception.maybe_download()
model = inception.Inception()

In [None]:
def imshow(image):
    plt.imshow(image, cmap='gray')
    plt.show()

In [None]:
def plot_confusion_matrix(cm, classes,
                          title='Confusion matrix',
                          cmap=plt.cm.BuPu):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

#### Create dataset using ```dataset.py``` helper file

In [None]:
dataset = ds.load_cached(cache_path='tilburg.pkl', in_dir='data/')

#### Get absolute filepaths for each image in dataset

In [None]:
def get_filepaths(in_dir, filenames):
    filepaths = []
    for name in filenames:
        filepaths.extend(os.path.join(in_dir, name))
    return filepaths

In [None]:
cache_path = os.path.join('data/','tilburg.pkl')
image_paths, cls, labels = dataset.get_training_set()
transfer_values = inception.transfer_values_cache(cache_path, model, \
                                                  image_paths=image_paths)

In [None]:
def random_batch(num_images, batch_size=64):

    # Create a random index.
    idx = np.random.choice(num_images,
                           size=batch_size,
                           replace=False)

    return idx

#### Split dataset into train and test sets

In [None]:
def split(dataset, idx, data, labels, num_actors=50):
    train_idx = []
    test_idx = []
    for i,k in enumerate(dataset.filenames):
        added = False
        for x in idx:
            if k == str(x) + '.jpg' or k == str(x) + '(flipped).jpg':
                test_idx.append(i)
                added = True
        
        if not added:
            train_idx.append(i)
    
    X_train = data[train_idx,:]
    X_train_labels = labels[train_idx,:]
    X_test = data[test_idx,:]
    X_test_labels = labels[test_idx,:]
    return (X_train, X_train_labels, X_test, X_test_labels)

#### Initialize all placeholders

In [None]:
_, dim = transfer_values.shape
num_classes = dataset.num_classes

# create placeholders for transfer values and labels
x = tf.placeholder(tf.float32, shape=[None, dim], name='x')
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, dimension=1)

# wrap the transfer-values as a Pretty Tensor object.
x_pretty = pt.wrap(x)
with pt.defaults_scope(activation_fn=tf.nn.relu):
    y_pred, loss = x_pretty.\
        fully_connected(size=1024, name='layer_fc1').\
        softmax_classifier(num_classes=num_classes, labels=y_true)

# create optimizer and global step variable for training
global_step = tf.Variable(initial_value=0, \
                           name='global_step', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss, global_step)

# create placeholders for predictions and accuracy
y_pred_cls = tf.argmax(y_pred, dimension=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#### Optimization function

In [None]:
def optimize(num_iters, check_every=100):
    
    thresh = 1e-6
    prev = 0.0
    
    best = 0.0
    best_pred = np.zeros_like(y_batch)
    
    for i in range(num_iters):
        feed_dict = {x: x_batch, y_true: y_batch}

        # global iteration counter
        i_global, _ = session.run([global_step, optimizer],
                                  feed_dict=feed_dict)
        
        # Calculate the accuracy on the test set.
        feed_dict_test = {x: X_test, y_true: X_test_labels}
        test_acc = session.run(accuracy,
                                feed_dict=feed_dict_test)
        
        if test_acc > best:
            best = test_acc
            best_pred = session.run(y_pred_cls,
                                feed_dict=feed_dict_test)
        
        # Print status to screen every 100 iterations (and last).
        if (i % check_every == 0) or (i == num_iters-1):
            
            if (i == num_iters-1):
                # Print status.
                msg = '-- Step: {}, Best Test-Set Accuracy: {:.2f}'
                print(msg.format(i+1,btest))
                return best, best_pred
            
            # if no significant change in accuracy
            if abs(test_acc - prev) <= thresh:
                msg = '-- Diverging after {} steps. ' + \
                      'Best Test-Set Accuracy: {:.2f}'
                print(msg.format(i, best))
                return best, best_pred
            
            prev = test_acc
        
    return best, best_pred

#### Run a single training/test step

In [None]:
# Initialize session
session = tf.Session()
session.run(tf.global_variables_initializer())

# Get a random train/test split of the dataset
fold_idx = random_batch(50, batch_size=5)
x_batch, y_batch, X_test, X_test_labels = split(dataset, \
                                                fold_idx, \
                                                transfer_values, \
                                                labels)
# Train for a set number of iterations
num_iters = 500
print('Optimizing...')
_, pred = optimize(num_iters, check_every=25)

# Plot confusion matrix
cm = confusion_matrix(y_pred=pred, y_true=np.argmax(X_test_labels, axis=1))
plot_confusion_matrix(cm, dataset.class_names)

# Close session and Inception model
session.close()
model.close()

#### Run 10-fold cross validation on the dataset

In [None]:
# keep running average confusion matrix, accuracy variables
cm_avg = np.zeros((dataset.num_classes, dataset.num_classes))
acc_sum = 0.0

num_folds = 10
num_iters = 400
num_actors = 50

# create KFold object to split data evenly among each fold
kf = KFold(num_actors, num_folds, shuffle=True)

count = 1
for i, idx in enumerate(kf):
    
    # Initialize the session
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    
    # get train/test split
    x_batch, y_batch, X_test, X_test_labels = split(dataset, \
                                                    idx[1], \
                                                    transfer_values, \
                                                    labels)
    
    # train for set number of iterations or until divergence
    print('Optimizing: fold {} of {}'.format(i+1, num_folds))
    acc, pred = optimize(num_iters, check_every=25)
    
    # accumulate confusion matrix and accuracy values
    cm_avg += confusion_matrix(y_pred=pred, \
                               y_true=np.argmax(X_test_labels, axis=1))
    acc_sum += acc

    # close the session
    session.close()
    
    count += 1
    
    
# compute mean accuracy over all folds
overall_acc = acc_sum / num_folds
print('Mean Test-Set Accuracy After {}-fold Cross Validation: {:.2f}' \
     .format(num_folds, overall_acc))

# compute mean confusion matrix over all folds
cm_avg /= num_folds
plot_confusion_matrix(cm_avg, dataset.class_names, \
                      title='Mean Confusion Matrix')

# close the Inception model
model.close()