# Problem 3: TensorFlow and Edge Detection using SVM

In [None]:
%matplotlib inline
from __future__ import division
from ipywidgets import interact
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import shutil
from utils import *
from HOG import *

Your task is to construct a SVM using the tf.estimator framework.
We have written most of the tf.estimator framework and generated the data. Your task is to fill in the feature extractor section and the **``` model_fn ``` ** function below. Please read through the Tensorflow Example IPython Notebook first.


First, we generate the data.

In [None]:
# Cell [a]
# generating data!
x_train, y_train = generate_data(N=5000)
x_eval, y_eval = generate_data(N=1000)
x_pred, y_true = generate_data(N=1000)
c = ['powderblue' if lb == 1 else 'indianred' for lb in y_train]
plt.scatter(x_train[:,0], x_train[:,1], c = c, alpha=0.5, s=50)

### Edit in Part (iv), but still run for Part(iii)

In [None]:
# Cell [b]
# Here, you need to define your features. You should use numpy operations.
# Feature extractor
def identityFeatureExtractor(x):
    return x
def customFeatureExtractor(x):
    # You need to write a feature extractor
    # It takes in x, a N x 2 array and outputs features of your design.
    # It should output a N x M array where M is the number of features
    # For example, it can output x1, x2, x1*x2
    ### Your code starts here ###
    phi_x = np.column_stack((x, x**2, x[:,0]*x[:,1]))
    #############################
    return phi_x

########## TO DO #######################
# Select which feature extractor you want
# featureExtractor = identityFeatureExtractor
featureExtractor = customFeatureExtractor

We start building the Estimator framework.
Make sure you define lambda here

In [None]:
# Cell [c]
## Settings for the estimator
# location to save model information
model_dir = "training_checkpoints"
rc = tf.estimator.RunConfig().replace(model_dir=model_dir,
                                      tf_random_seed=None,
                                      save_summary_steps=100,
                                      save_checkpoints_steps=1000,
                                      keep_checkpoint_max=1000,
                                      log_step_count_steps=1000)

# hyperparameters, batch sizes, learning rate
hps = tf.contrib.training.HParams(
    train_batch_size = 32,
    eval_batch_size = 32,
    ############ Select your learning rate and lambda value here ############
    lr = 0.075,
    lam = 0.825
    #########################################################################
    )


### Part (iii)
```model_fn``` is defined here. Your task is to fill in the missing code

In [None]:
# Cell [d]
# Defining necessary functions for the estimator 
# input data functions for train, evalulation and prediction
# note that x and y are fed in as dictionaries
train_input_function = tf.estimator.inputs.numpy_input_fn(x={"x":featureExtractor(x_train)},
                                                          y={"y":y_train},
                                                          batch_size=hps.train_batch_size,
                                                          num_epochs=None,
                                                          shuffle=True)
eval_input_function = tf.estimator.inputs.numpy_input_fn(x={"x":featureExtractor(x_eval)},
                                                         y={"y":y_eval},
                                                         batch_size=hps.eval_batch_size,
                                                         num_epochs=1,
                                                         shuffle=False)
def predict_input_function(x_dict):
    return tf.estimator.inputs.numpy_input_fn(x=x_dict,
                                              num_epochs=1,
                                              shuffle=False)


def model_fn(features, labels, mode, params):
    predictions = None
    loss = None
    train_op = None
    eval_metric_ops = None 
    export_outputs = None

    x = tf.to_float(features["x"])
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        y = tf.to_float(labels["y"])
        with tf.variable_scope("svm"):
            W = tf.get_variable("weights", dtype=tf.float32, shape=(x.shape[-1],1))
            b = tf.get_variable("bias", dtype=tf.float32, shape=(1,))
            ### Your code starts here ###
            # You may find the following functions useful: tf.matmul, tf.reduce_mean, tf.maximum, tf.square, tf.nn.l2_loss, tf.reduce_sum
            # use the ?<function name> functionality in jupyter to find out more about the functions.
            # This should take about 2 lines.
            ###########################
            y_est = tf.matmul(x, W) - b
            loss = tf.reduce_mean(tf.maximum(0.0, 1.0 - y*y_est)) + 2*params.lam*tf.nn.l2_loss(W)
            #####
            accuracy = tf.reduce_mean(tf.to_float(y*y_est > 0))
            eval_metric_ops = {"accuracy": tf.metrics.mean(accuracy)}
            if mode == tf.estimator.ModeKeys.TRAIN:
                opt = tf.train.GradientDescentOptimizer(learning_rate=params.lr)
                train_op = opt.minimize(loss, tf.train.get_global_step())
    elif mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {}
        with tf.variable_scope("svm"):
            W = tf.get_variable("weights", shape=(x.shape[-1],1), dtype=tf.float32)
            b = tf.get_variable("bias", shape=(1,), dtype=tf.float32)
            ### Your code starts here ###
            # This should take about 1 line
            #############################
            y_est = tf.matmul(x, W) - b
            #####
            predictions["y_est"] = y_est
            predictions["labels"] = 2*tf.sign(y_est) - 1
    return tf.estimator.EstimatorSpec(mode, 
                                      predictions=predictions, 
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)

In [None]:
# Cell [e]
# Define the estimator object
estimator_model = tf.estimator.Estimator(model_fn, 
                                         params=hps, 
                                         config=rc,
                                         model_dir=model_dir,
                                         )

In [None]:
# Cell [f]
# Define the specifications for training and evaluation
train_spec = tf.estimator.TrainSpec(input_fn=train_input_function, max_steps=10000)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_function, steps=100, start_delay_secs=0)

In [None]:
# Cell [g]
# Clear graph and start training and evaluating the model
shutil.rmtree(model_dir, ignore_errors=True)
tf.estimator.train_and_evaluate(estimator_model, train_spec, eval_spec)

In [None]:
# Cell [g]
# Make predictions given new x data
y_predict = estimator_model.predict(predict_input_function({"x":featureExtractor(x_pred)}), predict_keys = "y_est")
y_pred = [y["y_est"][0] for y in y_predict]
labels_pred = np.sign(np.array(y_pred))

In [None]:
# Cell[i]
# Plot predictions
c = ['powderblue' if lb == 1 else 'indianred' for lb in labels_pred]
plt.scatter(x_pred[:,0], x_pred[:,1], c=c, s=50, alpha=0.5)
# misclassified data
d = labels_pred - y_true[:,0]
misclass_idx = np.where(d!= 0)[0]
c = ['red' if lb == 2 else 'blue' for lb in d[misclass_idx]]
plt.scatter(x_pred[misclass_idx,0], x_pred[misclass_idx,1], c=c, s=50, alpha=0.8)

In [None]:
# misclassification rate
rate = misclass_idx.shape[0]/y_true.shape[0]
rate_r = np.count_nonzero([lb == 2 for lb in d[misclass_idx]])/y_true.shape[0]
rate_b = rate - rate_r
print "Misclassification Rate: %.3f = %.3f (Red) + %.3f (Blue)" % (rate, rate_r, rate_b)

### Part (vii) SVM with Histograms of Gradients


In [None]:
# Cell [j]
sess = tf.InteractiveSession()
datasets = np.load('pedestrian_dataset.npz')    # extracted from the original Dalal and Triggs paper dataset available here: http://pascal.inrialpes.fr/data/human/
datasets.keys()

In [None]:
# Cell [k]
### EXPLORE THE DATASET!
data = datasets['train_pos']
# data = datasets['train_neg']
@interact(k = (0, data.shape[0]-1))
def view_img(k):
    plt.imshow(data[k])

In [None]:
# Cell[l]
### VISUALIZE HOGs!
data = datasets['train_pos']
# data = datasets['train_neg']

@interact(k = (0, data.shape[0]-1))
def view_img(k):
    plt.figure(figsize = (15,15))    # feel free to change this depending on your screen resolution
    plt.imshow(data[k])
    # we can call .eval() instead of sess.run() below since we're using an InteractiveSession
    plot_cell_hogs(tf_histogram_of_oriented_gradients(data[k])[0].eval())

In [None]:
# Cell [m]
### USE TENSORFLOW AS A COMPUTATION FRAMEWORK (NOT JUST FOR BACKPROPAGATION!)
##### You code starts here #####
############################################################
def get_hogs(data_pos, data_neg):
    x_pos = [tf_histogram_of_oriented_gradients(d)[1] for d in data_pos]
    x_neg = [tf_histogram_of_oriented_gradients(d)[1] for d in data_neg]
    y_pos = tf.ones((len(data_pos), 1), tf.int32)
    y_neg = tf.fill((len(data_neg), 1), -1)
    
    x = tf.concat(x_pos + x_neg, axis = 0).eval()
    y = tf.concat([y_pos, y_neg], axis = 0).eval()
    return x, y

x_train, y_train = get_hogs(datasets["train_pos"], datasets["train_neg"])
x_eval, y_eval = get_hogs(datasets["eval_pos"], datasets["eval_neg"])
x_pred, y_true = get_hogs(datasets["test_pos"], datasets["test_neg"])
############################################################
featureExtractor = identityFeatureExtractor

In [None]:
### RERUN TRAINING CELLS [c]-[g] ABOVE

In [None]:
# Cell [n]
### VISUALIZE WEIGHTED HOGs!
data = datasets['train_pos']
# data = datasets['train_neg']

weights = np.reshape(estimator_model.get_variable_value('svm/weights'), [1, -1])
pos_weights = np.maximum(weights, 0)
neg_weights = -np.minimum(weights, 0)
@interact(k = (0, data.shape[0]-1), show_image = (False, True))
def view_img(k, show_image):
    plt.figure(figsize = (15,15))    # feel free to change this depending on your screen resolution
    plt.imshow(data[k])
    hog = hog_descriptor(data[k]).eval()
    unweighted_block_hogs = np.mean(hog.reshape([16, 8, 1, 9]), axis=2)
    pos_weighted_block_hogs = np.mean(pos_weights.reshape([16, 8, 1, 9])*hog.reshape([16, 8, 1, 9]), axis=2)
    neg_weighted_block_hogs = np.mean(neg_weights.reshape([16, 8, 1, 9])*hog.reshape([16, 8, 1, 9]), axis=2)

    plt.subplot(1,3,1)
    color = 'yellow' if show_image else 'white'
    plt.imshow(show_image*data[k])
    plot_cell_hogs(unweighted_block_hogs, pixels_in_cell=8, color=color)
    plt.subplot(1,3,2)
    plt.imshow(show_image*data[k])
    plot_cell_hogs(pos_weighted_block_hogs, pixels_in_cell=8, color=color)
    plt.subplot(1,3,3)
    plt.imshow(show_image*data[k])
    plot_cell_hogs(neg_weighted_block_hogs, pixels_in_cell=8, color=color)

In [None]:
# misclassification rate
d = labels_pred - y_true[:,0]
misclass_idx = np.where(d != 0)[0]
rate = misclass_idx.shape[0]/y_true.shape[0]
rate_p = np.count_nonzero([lb == 2 for lb in d[misclass_idx]])/y_true.shape[0]
rate_n = rate - rate_p
print "Misclassification Rate: %.3f = %.3f (Positive) + %.3f (Negative)" % (rate, rate_p, rate_n)