# Training classifiers on HOG Data
#### For extraction of features, see extractFeatures.ipynb

conda env- faus_dl

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import sklearn
from sklearn import datasets, svm, metrics
import bcolz
import os, sys
import datetime as dt

In [2]:
DISFA_path='/media/amogh/Stuff/CMU/datasets/DISFA_data/'
DISFA_AU_path=DISFA_path+'ActionUnit_Labels/'
print(DISFA_AU_path)
Videos_right_path=DISFA_path+'Videos_RightCamera/'
print(Videos_right_path)

/media/amogh/Stuff/CMU/datasets/DISFA_data/ActionUnit_Labels/
/media/amogh/Stuff/CMU/datasets/DISFA_data/Videos_RightCamera/


In [3]:
import mnist_helpers

Helper Functions

## FAU4_1

Loading the saved features:

In [4]:
dir_features_hog_1_fau4_1=DISFA_path+"/features/hog_1/fau4_1/"
dir_features_hog_1_fau4_1
if not os.path.exists(dir_features_hog_1_fau4_1):
    os.makedirs(dir_features_hog_hog_1_fau4_1)

In [5]:
features_fau4_1_hog_1_positives=bcolz.carray(rootdir=dir_features_hog_1_fau4_1+'positives/',mode='r')
print("features_fau4_1_hog_1_positives loaded, shape is: ",features_fau4_1_hog_1_positives.shape)

features_fau4_1_hog_1_positives loaded, shape is:  (946, 448)


In [6]:
features_fau4_1_hog_1_negatives=bcolz.carray(rootdir=dir_features_hog_1_fau4_1+'negatives/',mode='r')
print("features_fau4_1_hog_1_negatives loaded, shape is : ",features_fau4_1_hog_1_negatives.shape)

features_fau4_1_hog_1_negatives loaded, shape is :  (1000, 448)


Defining the training and testing data

In [7]:
X=np.concatenate([features_fau4_1_hog_1_positives,features_fau4_1_hog_1_negatives[:946]])
X.shape

(1892, 448)

In [8]:
Y=np.concatenate([np.repeat(1,features_fau4_1_hog_1_positives.shape[0]),np.repeat(0,features_fau4_1_hog_1_positives.shape[0])])
Y.shape

(1892,)

In [9]:
X_train,X_test,y_train,y_test=sklearn.model_selection.train_test_split(X,Y,test_size=0.15,random_state=42)

Defining the parameter range for SVM:

In [10]:
gamma_range=np.outer(np.logspace(-3,0,4),np.array([1,5]))
gamma_range=gamma_range.flatten()
C_range=np.outer(np.logspace(-1,1,3),np.array([1,5]))
C_range=C_range.flatten()
parameters={'kernel':['rbf'],'C':C_range,'gamma':gamma_range}

In [11]:
svm_clsf=svm.SVC()
grid_clsf=sklearn.model_selection.GridSearchCV(estimator=svm_clsf,param_grid=parameters,n_jobs=1,verbose=2)

Start training and parameter search 

In [13]:
start_time=dt.datetime.now()
print('Start param searching at {}'.format(str(start_time)))
grid_clsf.fit(X_train,y_train)
elapsed_time=dt.datetime.now()-start_time
print('Elapsed time, param searching {}'.format(str(elapsed_time)))
sorted(grid_clsf.cv_results_.keys())

Start param searching at 2018-05-27 16:14:43.992140
Fitting 3 folds for each of 48 candidates, totalling 144 fits
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=0.1, gamma=0.001, kernel=rbf, total=   0.9s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV] ................... C=0.1, gamma=0.001, kernel=rbf, total=   0.9s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=0.1, gamma=0.001, kernel=rbf, total=   0.9s
[CV] C=0.1, gamma=0.005, kernel=rbf ..................................
[CV] ................... C=0.1, gamma=0.005, kernel=rbf, total=   0.6s
[CV] C=0.1, gamma=0.005, kernel=rbf ..................................
[CV] ................... C=0.1, gamma=0.005, kernel=rbf, total=   0.6s
[CV] C=0.1, gamma=0.005, kernel=rbf ..................................
[CV] ................... C=0.1, gamma=0.005, kernel=rbf, total=   0.6s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] .................... C=0.1, gamma=0.01, kernel=rbf, total=   0.4s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] .................... C=0.1, gamma=0.01, kernel=rbf, total=   0.4s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] .

[Parallel(n_jobs=1)]: Done 144 out of 144 | elapsed:  1.1min finished


['mean_fit_time',
 'mean_score_time',
 'mean_test_score',
 'mean_train_score',
 'param_C',
 'param_gamma',
 'param_kernel',
 'params',
 'rank_test_score',
 'split0_test_score',
 'split0_train_score',
 'split1_test_score',
 'split1_train_score',
 'split2_test_score',
 'split2_train_score',
 'std_fit_time',
 'std_score_time',
 'std_test_score',
 'std_train_score']

In [17]:
classifier=grid_clsf.best_estimator_
params=grid_clsf.best_params_

scores=grid_clsf.cv_results_['mean_test_score'].reshape(len(C_range),len(gamma_range))
# plot_param_space_scores(scores,C_range, gamma_range)

In [18]:
scores

array([[0.50124378, 0.97885572, 0.97885572, 0.97947761, 0.9800995 ,
        0.98258706, 0.97885572, 0.50124378],
       [0.97885572, 0.97885572, 0.97947761, 0.98258706, 0.98507463,
        0.99626866, 0.99440299, 0.50124378],
       [0.97885572, 0.97947761, 0.98072139, 0.98507463, 0.99502488,
        0.99751244, 0.99689055, 0.95957711],
       [0.97947761, 0.98258706, 0.98569652, 0.99689055, 0.99813433,
        0.99751244, 0.99751244, 0.95957711],
       [0.98134328, 0.98569652, 0.99502488, 0.99813433, 0.99813433,
        0.99751244, 0.99751244, 0.95957711],
       [0.98569652, 0.99689055, 0.99813433, 0.99813433, 0.99813433,
        0.99751244, 0.99751244, 0.95957711]])

In [24]:
expected = y_test
predicted = classifier.predict(X_test)
print(predicted)
print("predicted and expected are same in: ", np.sum(expected==predicted)," out of ", expected.shape[0])

print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(expected, predicted)))
      
cm = metrics.confusion_matrix(expected, predicted)
print("Confusion matrix:\n%s" % cm)

# plot_confusion_matrix(cm)

print("Accuracy={}".format(metrics.accuracy_score(expected, predicted)))


[1 1 0 0 1 0 0 0 1 0 1 0 1 1 1 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 0 1 0 0
 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 1 1 1 0 1 0 1 1 1 1 0 1 0 1 1 1 1 0 0 1 0 0
 1 1 1 1 1 0 0 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 1 0 0 1 0 1 0
 1 0 0 0 1 0 1 0 1 0 0 1 1 0 1 1 1 1 0 0 0 1 1 0 1 1 0 0 0 1 1 0 1 0 0 1 0
 1 1 0 1 1 1 1 0 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 1 1 1 0 1 1 1 0 1 1 1
 1 0 0 0 0 1 0 1 1 1 0 0 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 1
 1 0 0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 1 0 0 1 1 1 0 1 0 0 1 1 0 0 1 0 1 0 1 0
 0 1 0 1 0 0 1 0 0 0 1 0 1 1 1 1 0 0 0 0 0 0 0 1 1]
predicted and expected are same in:  284  out of  284
Classification report for classifier SVC(C=5.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

          0       1.00      1.00      1.00       144
  

In [25]:
Y.shape

(1892,)

In [27]:
classifier.predict(features_fau4_1_hog_1_positives)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [36]:
X.shape

(1892, 448)

In [37]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

X = X[y != 0, :2]
y = y[y != 0]

n_sample = len(X)

np.random.seed(0)
order = np.random.permutation(n_sample)
X = X[order]
y = y[order].astype(np.float)

X_train = X[:int(.9 * n_sample)]
y_train = y[:int(.9 * n_sample)]
X_test = X[int(.9 * n_sample):]
y_test = y[int(.9 * n_sample):]

In [14]:
# Standard scientific Python imports
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import numpy as np

def show_some_digits(images, targets, sample_size=24, title_text='Digit {}' ):
    '''
    Visualize random digits in a grid plot
    images - array of flatten gidigs [:,784]
    targets - final labels
    '''
    nsamples=sample_size
    rand_idx = np.random.choice(images.shape[0],nsamples)
    images_and_labels = list(zip(images[rand_idx], targets[rand_idx]))


    img = plt.figure(1, figsize=(15, 12), dpi=160)
    for index, (image, label) in enumerate(images_and_labels):
        plt.subplot(np.ceil(nsamples/6.0), 6, index + 1)
        plt.axis('off')
        #each image is flat, we have to reshape to 2D array 28x28-784
        plt.imshow(image.reshape(28,28), cmap=plt.cm.gray_r, interpolation='nearest')
        plt.title(title_text.format(label))

def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    Plots confusion matrix, 
    
    cm - confusion matrix
    """
    plt.figure(1, figsize=(15, 12), dpi=160)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')    
    


class MidpointNormalize(Normalize):

    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y))

def plot_param_space_scores(scores, C_range, gamma_range):
    """
    Draw heatmap of the validation accuracy as a function of gamma and C
    
    
    Parameters
    ----------
    scores - 2D numpy array with accuracies
    
    """
    #
    # The score are encoded as colors with the hot colormap which varies from dark
    # red to bright yellow. As the most interesting scores are all located in the
    # 0.92 to 0.97 range we use a custom normalizer to set the mid-point to 0.92 so
    # as to make it easier to visualize the small variations of score values in the
    # interesting range while not brutally collapsing all the low score values to
    # the same color.

    
    plt.figure(figsize=(8, 6))
    plt.subplots_adjust(left=.2, right=0.95, bottom=0.15, top=0.95)
    plt.imshow(scores, interpolation='nearest', cmap=plt.cm.jet,
               norm=MidpointNormalize(vmin=0.5, midpoint=0.9))
    plt.xlabel('gamma')
    plt.ylabel('C')
    plt.colorbar()
    plt.xticks(np.arange(len(gamma_range)), gamma_range, rotation=45)
    plt.yticks(np.arange(len(C_range)), C_range)
    plt.title('Validation accuracy')
    plt.show()