# SVM Classifier for Hard Negative Mining Project


In [2]:
# Run some setup code for this notebook.

import random
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from skimage.feature import hog
from skimage import data, exposure
import time 

from __future__ import print_function

# This is to make matplotlib figures appear inline in the notebook
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# So the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

## Load images

In [9]:
data_dir = 'facesDataAll'

data_transforms = {
    'train': transforms.Compose([
        transforms.Scale(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'dev': transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
    transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'dev', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=1,
                                              shuffle=True, num_workers=0)
               for x in ['train', 'dev', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'dev', 'test']}

class_names = image_datasets['train'].classes

# Iterate over data.
X_train = []
y_train = []
for data in dataloaders['train']:
    # get the inputs
    image, label = data
    #image, label = Variable(input), Variable(label)

    # save all histogram of gradients to file so SVM can run on it

    #path = "./Data/dev/attractive/3e9004df7724847e81162dace6922a6d.jpg"
    #image = cv2.imread(path, 0)
    newImage = image.numpy()[0, 0, :, :]

    X_train.append(image.numpy())
    y_train.append(label)
print('Done processing train set')


X_dev = []
y_dev = []
for data in dataloaders['dev']:
    # get the inputs
    image, label = data

    # save all histogram of gradients to file so SVM can run on it

#     newImage = image.numpy()[0, 0, :, :]

    X_dev.append(image.numpy())
    y_dev.append(label)
print('Done processing dev set')


X_test = []
y_test = []
for data in dataloaders['test']:
    # get the inputs
    image, label = data

    # save all histogram of gradients to file so SVM can run on it

    newImage = image.numpy()[0, 0, :, :]

    X_test.append(image.numpy())
    y_test.append(label)
print('Done processing test set')


Done processing train set
Done processing dev set
Done processing test set


In [10]:
X_test = np.array(X_test)
y_test = np.array(y_test)

X_train = np.array(X_train)
y_train = np.array(y_train)

X_dev = np.array(X_dev)
y_dev = np.array(y_dev)

In [11]:


# Flatten and convert data into rows as part of out preprocess
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

#reshape y's
y_train = np.reshape(y_train, (X_train.shape[0], 1))
y_test = np.reshape(y_test, (X_test.shape[0], 1))
y_dev = np.reshape(y_dev, (X_dev.shape[0], 1))

#Sanity Check
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)

print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)


print('Dev data shape: ', X_dev.shape)
print('Dev labels shape: ', y_dev.shape)

Train data shape:  (948, 150528)
Train labels shape:  (948, 1)
Test data shape:  (183, 150528)
Test labels shape:  (183, 1)
Dev data shape:  (180, 150528)
Dev labels shape:  (180, 1)


In [12]:
# Normalizing
mean_image = np.mean(X_train, axis=0)

X_train -= mean_image
# X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

In [13]:
# Add bias
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
# X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

# print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)
print(X_train.shape, X_test.shape, X_dev.shape)

(948, 150529) (183, 150529) (180, 150529)


### Stochastic Gradient Descent

Ready to compute SGD to minimize the loss.

In [14]:
# from utils.classifiers import LinearSVM
# svm = LinearSVM()
# tic = time.time()
# loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4,
#                       num_iters=1500, verbose=True)
# toc = time.time()
# print('That took %fs' % (toc - tic))

from sklearn.svm import SVC
clf = SVC(verbose=True)
tic = time.time()
clf.fit(X_train, y_train.ravel()) 
toc = time.time()
print('That took %fs' % (toc - tic))


test_acc = clf.score(X_test, y_test.ravel())
train_acc = clf.score(X_train, y_train.ravel())
print('Train acc: ', train_acc)
print('Test acc: ', test_acc)

[LibSVM]That took 163.541493s
Train acc:  0.918776371308
Test acc:  0.573770491803


In [14]:
clfLin = SVC(kernel='linear')
tic = time.time()
clfLin.fit(X_train, y_train.ravel()) 
toc = time.time()
print('That took %fs' % (toc - tic))

test_accLin = clfLin.score(X_test, y_test.ravel())
train_accLin = clfLin.score(X_train, y_train.ravel())
print('Train acc linear: ', train_accLin)
print('Test acc linear: ', test_accLin)

Train acc linear:  1.0
Test acc linear:  0.464480874317
