# DISFA

## Importing libraries and setting folders

Libraries

In [1]:
import os, sys, random, glob, argparse, math, gc

In [2]:
import cv2
import dlib
import imutils
from imutils import face_utils
import matplotlib
import matplotlib.pyplot as plt
from skimage.feature import hog
from skimage import data, exposure

In [79]:
import sklearn
from sklearn import svm, metrics

In [3]:
import numpy as np
import pandas as pd
from bcolz import carray

In [81]:
from tqdm import tqdm
from time import sleep
import datetime as dt

Folders

In [9]:
folder_DISFA_data = "/media/amogh/Stuff/CMU/datasets/DISFA_data/"
folder_DISFA_FAU = "/media/amogh/Stuff/CMU/datasets/DISFA_data/ActionUnit_Labels/"
folder_DISFA_FAU_summary = "DISFA_FAUs/"

## Helper functions

### Getting a dictionary with positives and negatives for each subject and frame

##### This function gives a dictionary in which all positives and negatives are there

In [7]:
# returns a dictionary in the form: {'SN001':{'positives': [1,2,3],'negatives':[4,5,6,7] }}
# ie corresponding to each subject a dictionary which contains list frame nos which are positives and 
def getDISFAFramesDictionary(folder_DISFA_FAU_summary, fau_no, fau_thresh):
    df_fau = pd.read_csv(folder_DISFA_FAU_summary + "{}/".format(fau_thresh) + "FAU{}.csv".format(fau_no))
    df_positives = df_fau.filter(regex="^((?!neg).)*$",axis=1)
    df_negatives = df_fau.filter(like="neg",axis=1) 
    list_subjects = df_positives.columns.values
    fau_dict = {}
    for subj in list_subjects:
        fau_dict[subj] = {'positives':[], 'negatives':[]}
        fau_dict[subj]['positives'] = [f for f in df_positives[subj].values if not math.isnan(f)]
        fau_dict[subj]['negatives'] = [f for f in df_negatives["{}_neg".format(subj)].values if not math.isnan(f)]
    return fau_dict

##### To have number of positives and negatives equal in number, let's have a dictionary in which the positives and the negatives corresponding to each category are different.

In [8]:
def equaliseDictionary(fau_dict):
    for subj in fau_dict.keys():
        number_positives = len(fau_dict[subj]['positives'])
        fau_dict[subj]['negatives'] = random.sample(fau_dict[subj]['negatives'], number_positives)
    return fau_dict

### Get test and train folds of the data

In [9]:
# returns a dictionary with keys as fold_0,fold_1,...,test
# make sure number of folds exactly divide the train subjects
def getTrainTestFolds (fau_dict, no_folds, no_test_subjects):
    list_subjects = fau_dict.keys()
    no_train_subjects = len(list_subjects) - no_test_subjects
    random.shuffle(list_subjects)
    test_subjects = list_subjects[-no_test_subjects:]
    train_subjects = list_subjects[:-no_test_subjects]
    dict_folds = {'test':{}}
    # putting train and test subjects in new dictionary
    for subj in test_subjects:
        dict_folds['test'][subj] = fau_dict[subj]
    fold_size = no_train_subjects / no_folds
#     fold_size_remainder = no_train_subjects % no_folds
    for fold_no in range(no_folds):
        fold_subjects = train_subjects[fold_no*fold_size : fold_no*fold_size+fold_size]
        dict_folds ['fold_{}'.format(fold_no)]={}
        for sub in fold_subjects:
            dict_folds ['fold_{}'.format(fold_no)] [sub] = fau_dict [sub]
    return dict_folds

### Crop and save images and features

##### Function for cropping given an image path 

In [10]:
def similarityTransform(inPoints, outPoints) :
    s60 = math.sin(60*math.pi/180);
    c60 = math.cos(60*math.pi/180);  
  
    inPts = np.copy(inPoints).tolist();
    outPts = np.copy(outPoints).tolist();
    
    xin = c60*(inPts[0][0] - inPts[1][0]) - s60*(inPts[0][1] - inPts[1][1]) + inPts[1][0];
    yin = s60*(inPts[0][0] - inPts[1][0]) + c60*(inPts[0][1] - inPts[1][1]) + inPts[1][1];
    
    inPts.append([np.int(xin), np.int(yin)]);
    
    xout = c60*(outPts[0][0] - outPts[1][0]) - s60*(outPts[0][1] - outPts[1][1]) + outPts[1][0];
    yout = s60*(outPts[0][0] - outPts[1][0]) + c60*(outPts[0][1] - outPts[1][1]) + outPts[1][1];
    
    outPts.append([np.int(xout), np.int(yout)]);
    
    tform = cv2.estimateRigidTransform(np.array([inPts]), np.array([outPts]), False);
    
    return tform;

In [11]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [12]:
#new function, doesnt write landmarks every single time
def detectAndaligncrop(impath, detector, predictor):
    image=cv2.imread(impath)
    image_float=np.float32(image)/255.0
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 1)
    #initialising images and allPoints arrays
    allPoints=[]
    for (i, rect) in enumerate(rects):
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
        points=[]
        for (x,y) in shape:
            points.append((x,y))
        allPoints.append(points)
    images=[image_float]
    #computation
    w=112
    h=112
    eyecornerDst = [ (np.int(0.3 * w ), np.int(h / 3)), (np.int(0.7 * w ), np.int(h / 3)) ];
    imagesNorm = [];
    pointsNorm = [];
    #     print allPoints[0]
    # Add boundary points for delaunay triangulation
    boundaryPts = np.array([(0,0), (w/2,0), (w-1,0), (w-1,h/2), ( w-1, h-1 ), ( w/2, h-1 ), (0, h-1), (0,h/2) ]);
    n = len(allPoints[0]);
    numImages = len(images)
    for i in xrange(0, numImages):
        points1 = allPoints[i];
        # Corners of the eye in input image
        eyecornerSrc  = [ allPoints[i][36], allPoints[i][45] ] ;
        # Compute similarity transform
        tform = similarityTransform(eyecornerSrc, eyecornerDst);
        # Apply similarity transformation
        img = cv2.warpAffine(images[i], tform, (w,h));
    #         print("debug im type shape max mean min ", img.dtype,img.shape,np.max(img),np.mean(img),np.min(img))
    #         plt.imshow(img)
        # Apply similarity transform on points
        points2 = np.reshape(np.array(points1), (68,1,2));        
        points = cv2.transform(points2, tform);
        points = np.float32(np.reshape(points, (68, 2)));
        pointsNorm.append(points);
        imagesNorm.append(img);
    #     print (pointsNorm[0])
    #     plt.imshow(imagesNorm[0]) 
    # Output image
    output=imagesNorm[0]
    rgb_image=cv2.cvtColor(output,cv2.COLOR_BGR2RGB)
    return rgb_image, pointsNorm[0]

##### Functions for getting features

Getting HOG, given an image path or an image, return features

In [13]:
#takes in rgb images and returns the required HOG descriptor array. 
def getHOGFeatures (orientations, pixels_per_cell, cells_per_block, image):
    if isinstance(image, basestring):
        im = cv2.cvtColor(cv2.imread(image),cv2.COLOR_BGR2RGB)
    else:
        im = image
    gray_im = cv2.cvtColor(im, cv2.COLOR_RGB2GRAY) 
    fd, hog_image = hog(gray_im, orientations=orientations, pixels_per_cell=pixels_per_cell, cells_per_block=cells_per_block, visualise=True)
#     hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02))
#     plt.imshow (hog_image_rescaled, cmap = plt.cm.gray)
#     print("HOG vector dimension: ", fd.shape)
    return fd

Other ways to get features

##### Preprocessing functions and function_dictionary

In [14]:
def FAU4_1(image,landmarks):
    cropped_im=image[:38]
    return cropped_im

def FAU1_1(image,landmarks):
    cropped_im=image[:38]
    return cropped_im

def FAU2_1(image,landmarks):
    cropped_im=image[:38]
    return cropped_im

def FAU5_1(image,landmarkPoints): #includes border
    rect_top=int(landmarkPoints[17][1])
    rect_bottom=int(landmarkPoints[29][1])
    rect_left=int(landmarkPoints[3][0])
    rect_right=int(landmarkPoints[12][0])
    cropped_im=image[rect_top:rect_bottom,rect_left:rect_right]
    border_top, border_bottom, border_left, border_right = [0,32-height,0,64-width]
    img_with_border = cv2.copyMakeBorder(cropped_im, border_top, border_bottom, border_left, border_right, cv2.BORDER_CONSTANT, value=[0,0,0])
    return img_with_border

def FAU12right_1(image,landmarkPoints):
    rect_top = int(landmarkPoints[34][1])
    rect_bottom = int(landmarkPoints[11][1])
    rect_left = int(landmarkPoints[34][0])
    rect_right = int(landmarkPoints[11][0])
    cropped_im = image[rect_top:rect_bottom,rect_left:rect_right]
    border_top, border_bottom, border_left, border_right = [0,32-height,0,32-width]
    img_with_border = cv2.copyMakeBorder(cropped_im, border_top, border_bottom, border_left, border_right, cv2.BORDER_CONSTANT, value=[0,0,0])
    return img_with_border

def FAU12left_1(image,landmarkPoints):
    rect_top = int(landmarkPoints[32][1])
    rect_bottom = int(landmarkPoints[5][1])
    rect_left = int(landmarkPoints[5][0])
    rect_right = int(landmarkPoints[32][0])
    cropped_im = image[rect_top:rect_bottom,rect_left:rect_right]
    border_top, border_bottom, border_left, border_right = [0,32-height,0,32-width]
    img_with_border = cv2.copyMakeBorder(cropped_im, border_top, border_bottom, border_left, border_right, cv2.BORDER_CONSTANT, value=[0,0,0])
    return img_with_border

function_dict={'FAU1_1':FAU1_1,'FAU2_1':FAU2_1,'FAU4_1':FAU4_1,'FAU5_1':FAU5_1, 'FAU12right_1':FAU12right_1, 'FAU12left_1':FAU12left_1}

##### Crop and save function

Made by keeping in mind that these are the parameters that we need to pass: o, ppc cpb, fau_no, thresh, function used for cropping, folders

In [15]:
#saves images and HOG features
def cropAndSaveImageHOG (o ,ppc ,cpb ,fau_no , thresh, dict_folds, folder_DISFA_data, cropping_function_name, function_dict, featuresFunction, boolSave=True):
    folder_cropped_images = folder_DISFA_data + "/features/cropped_images/"
    folder_dest = folder_cropped_images +  "/{}/{}/".format(thresh,cropping_function_name)
    folder_features_dest = folder_DISFA_data + "/features/hog/{}/{}/".format(thresh,cropping_function_name)
    print("images go to: ",folder_dest, "\n", "features go to:", folder_features_dest)
    # initialize dlib's face detector (HOG-based) and then create
    # the facial landmark predictor
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    if not os.path.exists(folder_dest):
        os.makedirs(folder_dest)
    if not os.path.exists(folder_features_dest):
        os.makedirs(folder_features_dest)
    for fold in dict_folds.keys():
        print ("inside:", fold)
        for subj in dict_folds[fold]:
            for category in dict_folds[fold][subj]:
#                 print ("inside: ",fold,subj,category)
                folder_dest_image = folder_dest + "{}/{}/{}/".format(fold,subj,category)
                folder_dest_feature = folder_features_dest + "{}/{}/{}/".format(fold,subj,category)
                if not os.path.exists(folder_dest_image):
                    os.makedirs(folder_dest_image)
                for frame_no, frame in enumerate(dict_folds[fold][subj][category]):
                    im_path = folder_DISFA_data + "Videos_RightCamera/RightVideo{}/{}.jpeg".format(subj,int(frame))
                    im_basename = os.path.basename(im_path)
                    im_dest_path = folder_dest_image + im_basename 
                    features_path = folder_dest_feature + os.path.splitext(im_basename)[0] 
                    if os.path.exists(im_path):
                        try:
                            #cropping and aligning images
                            im_aligned_cropped,landmarkPoints = detectAndaligncrop(im_path, detector, predictor)
                            cropped_rgb_image = function_dict[cropping_function_name] (im_aligned_cropped, landmarkPoints)
                            #saving cropped RGB images in BGR(because opencv uses BGR as default)
                            cv2.imwrite(im_dest_path, cv2.cvtColor(cropped_rgb_image,cv2.COLOR_RGB2BGR)*255.)
                            #getting features
                            fd = featuresFunction(o, ppc, cpb, cropped_rgb_image)
                            #saving features
                            if not (os.path.exists(features_path)):
                                os.makedirs(features_path)
                            carray_fd = carray(fd, rootdir=features_path, mode = 'w')
                            carray_fd.flush()
                            if frame_no%100 == 0:
                                print("frames processed: ", frame_no)
                        except KeyboardInterrupt:
                            break
                        except: 
                            continue
                else:
                    continue
                break
            else:
                continue
            break
        else:
            continue
        break
               

In [5]:
def finalSaveImagesFeatures(o ,ppc ,cpb ,fau_no , thresh, cropping_function_name, no_folds=5, no_test_subjects=2, function_dict=function_dict, featuresFunction=getHOGFeatures, folder_DISFA_FAU_summary=folder_DISFA_FAU_summary, folder_DISFA_data=folder_DISFA_data, boolEqualise=True):
    frames_dict = getDISFAFramesDictionary(folder_DISFA_FAU_summary,fau_no,thresh)
    frames_dict = equaliseDictionary(frames_dict)
    dict_folds = getTrainTestFolds(frames_dict,no_folds,no_test_subjects)
    cropAndSaveImageHOG(o ,ppc ,cpb ,fau_no ,thresh , dict_folds, folder_DISFA_data,cropping_function_name,function_dict,getHOGFeatures)

NameError: name 'function_dict' is not defined

Things to do:
    - just calculate the features and save them in appropriate folder; save colored image only so that you can use deep learning
    - for training; load the features and make X, Y. Then train for different folds, report accuracy for each test fold and show the average in the end.

## Main Function:

In [None]:
def trainDISFA (fau_no, train_no, fau_thresh, test_subjects_no, boolGetLists=False, boolCalcFeatures=False, boolCrossValidation=True, ):
    if boolGetLists:
        getDISFALists

### trainSVMGridSearchModel helper function
Using GridSearchCV model

#### Rough functions and ideas(not useful now)

In [95]:
gamma_range = np.outer(np.logspace(-3,0,4),np.array([1,5]))
gamma_range = gamma_range.flatten()
C_range = np.outer(np.logspace(-1,1,3),np.array([1,5]))
C_range = C_range.flatten()
parameters = {'kernel': ['linear'],'C':C_range,'gamma':gamma_range}

In [97]:
for g in sklearn.model_selection.ParameterGrid(parameters):
    print (g)

{'kernel': 'linear', 'C': 0.1, 'gamma': 0.001}
{'kernel': 'linear', 'C': 0.1, 'gamma': 0.005}
{'kernel': 'linear', 'C': 0.1, 'gamma': 0.01}
{'kernel': 'linear', 'C': 0.1, 'gamma': 0.05}
{'kernel': 'linear', 'C': 0.1, 'gamma': 0.1}
{'kernel': 'linear', 'C': 0.1, 'gamma': 0.5}
{'kernel': 'linear', 'C': 0.1, 'gamma': 1.0}
{'kernel': 'linear', 'C': 0.1, 'gamma': 5.0}
{'kernel': 'linear', 'C': 0.5, 'gamma': 0.001}
{'kernel': 'linear', 'C': 0.5, 'gamma': 0.005}
{'kernel': 'linear', 'C': 0.5, 'gamma': 0.01}
{'kernel': 'linear', 'C': 0.5, 'gamma': 0.05}
{'kernel': 'linear', 'C': 0.5, 'gamma': 0.1}
{'kernel': 'linear', 'C': 0.5, 'gamma': 0.5}
{'kernel': 'linear', 'C': 0.5, 'gamma': 1.0}
{'kernel': 'linear', 'C': 0.5, 'gamma': 5.0}
{'kernel': 'linear', 'C': 1.0, 'gamma': 0.001}
{'kernel': 'linear', 'C': 1.0, 'gamma': 0.005}
{'kernel': 'linear', 'C': 1.0, 'gamma': 0.01}
{'kernel': 'linear', 'C': 1.0, 'gamma': 0.05}
{'kernel': 'linear', 'C': 1.0, 'gamma': 0.1}
{'kernel': 'linear', 'C': 1.0, 'gamma

In [83]:
def trainSVMGridSearchModel(X_train, Y_train, X_test, Y_test, no_jobs=4, kernel_list=['linear']):
    #setup parameter search space
    gamma_range = np.outer(np.logspace(-3,0,4),np.array([1,5]))
    gamma_range = gamma_range.flatten()
    C_range = np.outer(np.logspace(-1,1,3),np.array([1,5]))
    C_range = C_range.flatten()
    parameters = {'kernel': kernel_list,'C':C_range,'gamma':gamma_range}
    for g in sklearn.model_selection.ParameterGrid(parameters):
            svm_clsf = svm.SVC(        )
    svm_clsf = svm.SVC()
    grid_clsf = sklearn.model_selection.GridSearchCV(estimator=svm_clsf,param_grid=parameters,n_jobs=no_jobs,verbose=2, cv=[(slice(None), slice(None))])
    #train
    start_time=dt.datetime.now()
    print('Start param searching at {}'.format(str(start_time)))
    grid_clsf.fit(X_train,Y_train)
    elapsed_time=dt.datetime.now()-start_time
    print('Elapsed time, param searching {}'.format(str(elapsed_time)))
    sorted(grid_clsf.cv_results_.keys())
    return grid_clsf

### trainManualSVMGridSearchModel helper function
Manually trying different parameters for model

In [None]:
def trainManualSVMGridSearchModel(X_train, Y_train, X_test, Y_test, no_jobs=1, kernel_list=['rbf','linear']):
    #setup parameter search space
    gamma_range = np.outer(np.logspace(-3,0,4),np.array([1,5]))
    gamma_range = gamma_range.flatten()
    C_range = np.outer(np.logspace(-1,1,3),np.array([1,5]))
    C_range = C_range.flatten()
    parameters = {'kernel': kernel_list,'C':C_range,'gamma':gamma_range}
    svm_clsf = svm.SVC()
    grid_clsf = sklearn.model_selection.GridSearchCV(estimator=svm_clsf,param_grid=parameters,n_jobs=no_jobs,verbose=2)
    #train
    start_time=dt.datetime.now()
    print('Start param searching at {}'.format(str(start_time)))
    grid_clsf.fit(X_train,y_train)
    elapsed_time=dt.datetime.now()-start_time
    print('Elapsed time, param searching {}'.format(str(elapsed_time)))
    sorted(grid_clsf.cv_results_.keys())
    return grid_clsf

### Main Train Function for manual fold approach

In [41]:
# TO BE ABLE TO TAKE IN OTHER DATASETS ALSO, SET THE FOLDER DATA ACCORDINGLY IN THE CORRESPONDING FUNCTION TO THE DATASET
#ideally loading folds also should've been a function.
#trainFunction as an argument allows changing choice of function for choosing the model
def train(fau_no, thresh, cropping_function_name,trainFunction, folder_data):
    fold_folder_list = glob.glob(folder_data + "features/hog/{}/{}/*".format(thresh, cropping_function_name))
    no_folds = len(fold_folder_list)
    for fold_no in range(no_folds):
        print("In fold number", fold_no)
        #do fold_no times training by testing on the folder corresponding to fold_no
        train_folds_folder_list = fold_folder_list[:fold_no] + fold_folder_list[fold_no+1:]
        list_positive_feature_folders = []
        list_negative_feature_folders = []
        positive_features = []
        negative_features = []
        # populate the list_positive_feature_folders and list_negative_feature_folders
        for fol in train_folds_folder_list:
            list_positive_feature_folders.extend(glob.glob(fol + "/*/positives/*/"))
            list_negative_feature_folders.extend(glob.glob(fol + "/*/negatives/*/"))
        # populate the positive_features and negative_features array
        print("loading positive features")
        for pos_feat_folder in list_positive_feature_folders:
            pos_feat = carray(rootdir = pos_feat_folder, mode = 'r')
            positive_features.append(pos_feat)
        print("loading negative features")
        for neg_feat_folder in list_negative_feature_folders:
            neg_feat = carray(rootdir = neg_feat_folder, mode = 'r')
            negative_features.append(neg_feat)
        positive_features = np.array(positive_features)
        negative_features = np.array(negative_features)
        print("shape of positive features array is: ", (positive_features).shape)
        print("shape of negative features array is: ", (negative_features).shape)
        train_array_X = np.concatenate((positive_features,negative_features))
        target_positives = np.ones(positive_features.shape[0])
        target_negatives = np.zeros(negative_features.shape[0])
        targets_Y = np.append(target_positives, target_negatives)
        # training data and labels loaded.

        #loading test data
        test_folder = fold_folder_list[fold_no]
        list_positive_test_folder = glob.glob(test_folder + "/*/positives/*/")
        list_negative_test_folder = glob.glob(test_folder + "/*/negatives/*/")
        positive_test_features = []
        negative_test_features = []
        for pos_feat_folder in list_positive_test_folder:
        pos_feat = carray(rootdir = pos_feat_folder, mode = 'r')
        positive_test_features.append(pos_feat)
        for neg_feat_folder in list_negative_test_folder:
            neg_feat = carray(rootdir = neg_feat_folder, mode = 'r')
            negative_test_features.append(neg_feat)
        positive_test_features = np.array(positive_test_features)
        negative_test_features = np.array(negative_test_features)
        test_array_X = np.concatenate((positive_test_features,negative_test_features))
        test_array_Y = np.append(np.ones(positive_test_features.shape[0]), np.zeros(negative_test_features.shape[0]))
        print("shape of test array X: ", np.array(test_array_X).shape)
        print("shape of test array Y: ", np.array(test_array_Y).shape)
    
        # training for this fold
        results = {}
        trainFunction(train_array_X, train_targets_Y, test_array_X, test_array_Y)
        classificationResult = trainModel(train_array_X, targets_Y, no_jobs, kernel_list = ['linear'])
        best_classifier = grid_clsf.best_estimator_
        best_params=grid_clsf.best_params_
        scores=grid_clsf.cv_results_['mean_test_score'].reshape(2,len(C_range),len(gamma_range))
        print("scores are: ", scores)
        print("best classifier is:", "\n", classifier)
        print("best parameters are: ", "\n", best_params)
        
        
        
        
    
        

In [74]:
fold_folder_list = glob.glob(folder_DISFA_data + "features/hog/{}/{}/*".format(3,'FAU2_1'))
no_folds = len(fold_folder_list)
for fold_no in range(no_folds):
    print("In fold number", fold_no)
    #do fold_no times training by testing on the folder corresponding to fold_no
    train_folds_folder_list = fold_folder_list[:fold_no] + fold_folder_list[fold_no+1:]
    list_positive_feature_folders = []
    list_negative_feature_folders = []
    positive_features = []
    negative_features = []
    # populate the list_positive_feature_folders and list_negative_feature_folders
    for fol in train_folds_folder_list:
        list_positive_feature_folders.extend(glob.glob(fol + "/*/positives/*/"))
        list_negative_feature_folders.extend(glob.glob(fol + "/*/negatives/*/"))
    # populate the positive_features and negative_features array
    print("loading positive features")
    for pos_feat_folder in list_positive_feature_folders:
        pos_feat = carray(rootdir = pos_feat_folder, mode = 'r')
        positive_features.append(pos_feat)
    print("loading negative features")
    for neg_feat_folder in list_negative_feature_folders:
        neg_feat = carray(rootdir = neg_feat_folder, mode = 'r')
        negative_features.append(neg_feat)
    positive_features = np.array(positive_features)
    negative_features = np.array(negative_features)
    print("shape of np positive arrays is: ", (positive_features).shape)
    print("shape of np negative arrays is: ", (negative_features).shape)
    train_array_X = np.concatenate((positive_features,negative_features))
    train_targets_Y = np.append(np.ones(positive_features.shape[0]), np.zeros(negative_features.shape[0]))
    #loading test data
    test_folder = fold_folder_list[fold_no]
    list_positive_test_folder = glob.glob(test_folder + "/*/positives/*/")
    list_negative_test_folder = glob.glob(test_folder + "/*/negatives/*/")
    positive_test_features = []
    negative_test_features = []
    for pos_feat_folder in list_positive_test_folder:
        pos_feat = carray(rootdir = pos_feat_folder, mode = 'r')
        positive_test_features.append(pos_feat)
    for neg_feat_folder in list_negative_test_folder:
        neg_feat = carray(rootdir = neg_feat_folder, mode = 'r')
        negative_test_features.append(neg_feat)
    positive_test_features = np.array(positive_test_features)
    negative_test_features = np.array(negative_test_features)
    test_array_X = np.concatenate((positive_test_features,negative_test_features))
    test_array_Y = np.append(np.ones(positive_test_features.shape[0]), np.zeros(negative_test_features.shape[0]))
    print("shape of test array X: ", np.array(test_array_X).shape)
    print("shape of test array Y: ", np.array(test_array_Y).shape)
#     positives = []
#     for fold_folder in train_folder_list:
#         array_posi=carray(rootdir=dir_features_hog_1_fau4_1+'positives/',mode='r')
#         print(fold_foder)

('In fold number', 0)
loading positive features
loading negative features
('shape of np positive arrays is: ', (1141, 1056))
('shape of np negative arrays is: ', (2120, 1056))
('shape of test array X: ', (302, 1056))
('shape of test array Y: ', (302,))
('In fold number', 1)
loading positive features
loading negative features
('shape of np positive arrays is: ', (874, 1056))
('shape of np negative arrays is: ', (1867, 1056))
('shape of test array X: ', (822, 1056))
('shape of test array Y: ', (822,))
('In fold number', 2)
loading positive features
loading negative features
('shape of np positive arrays is: ', (923, 1056))
('shape of np negative arrays is: ', (1916, 1056))
('shape of test array X: ', (724, 1056))
('shape of test array Y: ', (724,))
('In fold number', 3)
loading positive features
loading negative features
('shape of np positive arrays is: ', (1228, 1056))
('shape of np negative arrays is: ', (2221, 1056))
('shape of test array X: ', (114, 1056))
('shape of test array Y: '

### Train function to use custom cross validation generator

In [166]:
def trainCustomGridSearch():
    fold_folder_list = glob.glob(folder_DISFA_data + "features/hog/{}/{}/*".format(3,'FAU2_1'))
    # defining global holders and variables
    no_folds = len(fold_folder_list)
    features = []
    targets = []
    fold_label_list = []

    #processing for each fold:
    for fold_no, fol in enumerate(fold_folder_list):
        #lists specific to fold
        list_positive_feature_folders = []
        list_negative_feature_folders = []
        positive_features = []
        negative_features = []
        fold_targets = []
        fold_train_features = []
        #loading features in lists
        list_positive_feature_folders.extend(glob.glob(fol + "/*/positives/*/"))
        list_negative_feature_folders.extend(glob.glob(fol + "/*/negatives/*/"))
        print("loading positive features for fold: ", fold_no)
        for pos_feat_folder in list_positive_feature_folders:
            pos_feat = carray(rootdir = pos_feat_folder, mode = 'r')
            positive_features.append(pos_feat)
        print("loading negative features for fold: ", fold_no)
        for neg_feat_folder in list_negative_feature_folders:
            neg_feat = carray(rootdir = neg_feat_folder, mode = 'r')
            negative_features.append(neg_feat)

        fold_train_features.extend(positive_features)
        fold_train_features.extend(negative_features)
        fold_targets.extend([1] * len(positive_features))
        fold_targets.extend([0] * len(negative_features))
        no_fold_features = len(positive_features) + len(negative_features)
        print(no_fold_features)
        #updating global features and targets
        features.extend(fold_train_features)
        targets.extend(fold_targets)
        #updating fold_label_list
        fold_label_list.extend([fold_no]*no_fold_features)

    #defining the custom cross validation generator over training data
    cvIterable= []
    for fold_no in range(no_folds):
        fold_label_list = np.array(fold_label_list)
        train_indices = np.argwhere(fold_label_list != fold_no).flatten()
        test_indices = np.argwhere(fold_label_list == fold_no).flatten()
        cvIterable.append((train_indices,test_indices))
    
    trainSVMGridSearchModel(X_train, Y_train , no_jobs=8, kernel_list=['linear'],cvIterable):

('loading positive features for fold: ', 0)
('loading negative features for fold: ', 0)
302
('loading positive features for fold: ', 1)
('loading negative features for fold: ', 1)
822
('loading positive features for fold: ', 2)
('loading negative features for fold: ', 2)
724
('loading positive features for fold: ', 3)
('loading negative features for fold: ', 3)
114
('loading positive features for fold: ', 4)
('loading negative features for fold: ', 4)
1601
3563
3563
3563


In [None]:
gamma_range = np.outer(np.logspace(-3,0,4),np.array([1,5]))
gamma_range = gamma_range.flatten()
C_range = np.outer(np.logspace(-1,1,3),np.array([1,5]))
C_range = C_range.flatten()
parameters = {'kernel': ['linear'],'C':C_range,'gamma':gamma_range}
svm_clsf = svm.SVC()
grid_clsf = sklearn.model_selection.GridSearchCV(estimator=svm_clsf,param_grid=parameters,n_jobs=4,verbose=2,cv=cvIterable)
#train
start_time=dt.datetime.now()
print('Start param searching at {}'.format(str(start_time)))
grid_clsf.fit(features,targets)
elapsed_time=dt.datetime.now()-start_time
print('Elapsed time, param searching {}'.format(str(elapsed_time)))
sorted(grid_clsf.cv_results_.keys())
return grid_clsf

Start param searching at 2018-06-29 17:31:12.315498
Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] kernel=linear, C=0.1, gamma=0.001 ...............................
[CV] kernel=linear, C=0.1, gamma=0.001 ...............................
[CV] kernel=linear, C=0.1, gamma=0.001 ...............................
[CV] ................ kernel=linear, C=0.1, gamma=0.001, total=   5.8s
[CV] kernel=linear, C=0.1, gamma=0.001 ...............................
[CV] ................ kernel=linear, C=0.1, gamma=0.001, total=   5.0s
[CV] kernel=linear, C=0.1, gamma=0.001 ...............................
[CV] ................ kernel=linear, C=0.1, gamma=0.001, total=   6.4s
[CV] kernel=linear, C=0.1, gamma=0.005 ...............................
[CV] ................ kernel=linear, C=0.1, gamma=0.001, total=   6.4s
[CV] ................ kernel=linear, C=0.1, gamma=0.001, total=   7.9s
[CV] kernel=linear, C=0.1, gamma=0.005 ...............................
[CV] kernel=linear, C=0.1, gamma=0

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:  3.5min


[CV] kernel=linear, C=0.1, gamma=5.0 .................................
[CV] .................. kernel=linear, C=0.1, gamma=1.0, total=   9.2s
[CV] .................. kernel=linear, C=0.1, gamma=1.0, total=   7.0s
[CV] kernel=linear, C=0.1, gamma=5.0 .................................
[CV] kernel=linear, C=0.1, gamma=5.0 .................................
[CV] .................. kernel=linear, C=0.1, gamma=5.0, total=   9.6s
[CV] .................. kernel=linear, C=0.1, gamma=5.0, total=   7.0s
[CV] kernel=linear, C=0.1, gamma=5.0 .................................
[CV] kernel=linear, C=0.1, gamma=5.0 .................................
[CV] .................. kernel=linear, C=0.1, gamma=5.0, total=   8.2s
[CV] kernel=linear, C=0.5, gamma=0.001 ...............................
[CV] .................. kernel=linear, C=0.1, gamma=5.0, total=   9.5s
[CV] .................. kernel=linear, C=0.1, gamma=5.0, total=   7.6s
[CV] kernel=linear, C=0.5, gamma=0.001 ...............................
[CV] .

[CV] kernel=linear, C=1.0, gamma=0.01 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.01, total=   2.6s
[CV] kernel=linear, C=1.0, gamma=0.01 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.01, total=   2.4s
[CV] kernel=linear, C=1.0, gamma=0.01 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.01, total=   1.9s
[CV] kernel=linear, C=1.0, gamma=0.05 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.05, total=   2.4s
[CV] kernel=linear, C=1.0, gamma=0.05 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.05, total=   2.2s
[CV] kernel=linear, C=1.0, gamma=0.05 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.05, total=   2.8s
[CV] kernel=linear, C=1.0, gamma=0.05 ................................
[CV] ................. kernel=linear, C=1.0, gamma=0.05, total=   2.5s
[CV] k

In [None]:
def trainSVMGridSearchModel(X_train, Y_train , no_jobs=1, kernel_list=['rbf','linear'],custom_fold_iterable):
    #setup parameter search space
    gamma_range = np.outer(np.logspace(-3,0,4),np.array([1,5]))
    gamma_range = gamma_range.flatten()
    C_range = np.outer(np.logspace(-1,1,3),np.array([1,5]))
    C_range = C_range.flatten()
    parameters = {'kernel': kernel_list,'C':C_range,'gamma':gamma_range}
    svm_clsf = svm.SVC()
    grid_clsf = sklearn.model_selection.GridSearchCV(estimator=svm_clsf,param_grid=parameters,n_jobs=no_jobs,verbose=2,cv=custom_fold_iterable)
    #train
    start_time=dt.datetime.now()
    print('Start param searching at {}'.format(str(start_time)))
    grid_clsf.fit(X_train,Y_train)
    elapsed_time=dt.datetime.now()-start_time
    print('Elapsed time, param searching {}'.format(str(elapsed_time)))
    sorted(grid_clsf.cv_results_.keys())
    return grid_clsf

In [87]:
a=trainSVMGridSearchModel(train_array_X, train_targets_Y, test_array_X, test_array_Y)

Start param searching at 2018-06-29 04:33:43.289561
Fitting 1 folds for each of 48 candidates, totalling 48 fits
[CV] kernel=linear, C=0.1, gamma=0.001 ...............................
[CV] kernel=linear, C=0.1, gamma=0.005 ...............................
[CV] kernel=linear, C=0.1, gamma=0.01 ................................
[CV] kernel=linear, C=0.1, gamma=0.05 ................................
[CV] ................ kernel=linear, C=0.1, gamma=0.001, total=   6.2s
[CV] kernel=linear, C=0.1, gamma=0.1 .................................
[CV] ................ kernel=linear, C=0.1, gamma=0.005, total=   6.1s
[CV] kernel=linear, C=0.1, gamma=0.5 .................................
[CV] ................. kernel=linear, C=0.1, gamma=0.01, total=   6.1s
[CV] kernel=linear, C=0.1, gamma=1.0 .................................
[CV] ................. kernel=linear, C=0.1, gamma=0.05, total=   6.4s
[CV] kernel=linear, C=0.1, gamma=5.0 .................................
[CV] .................. kernel=line

[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   37.3s


[CV] ................ kernel=linear, C=10.0, gamma=0.01, total=   0.6s
[CV] kernel=linear, C=10.0, gamma=1.0 ................................
[CV] ................ kernel=linear, C=10.0, gamma=0.05, total=   0.6s
[CV] kernel=linear, C=10.0, gamma=5.0 ................................
[CV] ................. kernel=linear, C=10.0, gamma=0.1, total=   0.6s
[CV] kernel=linear, C=50.0, gamma=0.001 ..............................
[CV] ................. kernel=linear, C=10.0, gamma=0.5, total=   0.6s
[CV] kernel=linear, C=50.0, gamma=0.005 ..............................
[CV] ................. kernel=linear, C=10.0, gamma=1.0, total=   0.6s
[CV] kernel=linear, C=50.0, gamma=0.01 ...............................
[CV] ................. kernel=linear, C=10.0, gamma=5.0, total=   0.6s
[CV] kernel=linear, C=50.0, gamma=0.05 ...............................
[CV] ............... kernel=linear, C=50.0, gamma=0.001, total=   0.4s
[CV] kernel=linear, C=50.0, gamma=0.1 ................................
[CV] .

[Parallel(n_jobs=4)]: Done  48 out of  48 | elapsed:   39.6s finished


Elapsed time, param searching 0:00:40.049057


In [89]:
a.cv_results_



{'mean_fit_time': array([2.46489501, 2.41000414, 2.40418506, 2.41879416, 2.45228887,
        2.41051316, 2.40889716, 2.4782989 , 1.51142097, 1.50869703,
        1.47709799, 1.46525478, 1.51642108, 1.4985261 , 1.46640706,
        1.45728183, 1.02990079, 1.01788902, 1.01321983, 0.9977541 ,
        1.02115107, 1.02485085, 1.0022769 , 1.00149083, 0.44751   ,
        0.44570494, 0.44344282, 0.44034481, 0.49199414, 0.49537587,
        0.48502111, 0.47372317, 0.32043719, 0.32652092, 0.33028102,
        0.32609415, 0.32586813, 0.33044004, 0.34925508, 0.34716105,
        0.21487689, 0.20935822, 0.21101189, 0.20752215, 0.20843506,
        0.20616698, 0.22901702, 0.23153996]),
 'mean_score_time': array([3.70184422, 3.72094393, 3.72335505, 3.97275686, 3.88255906,
        3.67291403, 3.66251993, 3.7178719 , 1.39907098, 1.40113211,
        1.40200496, 1.4010911 , 1.39370799, 1.39437103, 1.39174199,
        1.393574  , 0.95028901, 0.94931507, 0.97351408, 0.97901106,
        0.97559786, 0.97592402, 1.