## OpenFace features + classifiers

In [1]:
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from PIL import Image
from random import shuffle
from tqdm.notebook import tqdm
import pandas as pd
from time import time

import pickle

from sklearn.svm import SVC,LinearSVC
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier,ExtraTreesClassifier
from sklearn import svm,metrics,preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

%matplotlib inline

In [2]:
DATA_DIR = r'D:\Users\amira\Documents\datasets\emotions\AudioVideo\openface'
IMG_SIZE = 224

emotion_to_index = {'Angry':0, 'Disgust':1, 'Fear':2, 'Happy':3, 'Neutral':4, 'Sad':5, 'Surprise':6}
device = 'cuda:0'

In [3]:
def create_openface_dataset(data_dir):
    x = []
    y = []
    for class_name in emotion_to_index:
        for filename in tqdm(os.listdir(os.path.join(data_dir,class_name))):
            fn=os.path.splitext(filename)[0] # goes through files names without extension
            if 'of_details' not in fn:
                openface_df = pd.read_csv(os.path.join(data_dir,class_name,filename))
                # fill zeroes with mean values on frames where openface failed to detect faces 
                openface_df.loc[openface_df[' success'] == 0] = openface_df.loc[openface_df[' success'] == 0].replace(0, openface_df.loc[openface_df[' success'] == 1].mean())
                # remove some irrelevant columns
                openface_df = openface_df.loc[:, ~openface_df.columns.isin(['frame', ' face_id', ' timestamp', ' confidence', ' success'])]

                total_features=None
                mean_features = (np.mean(openface_df, axis=0))
                std_features = (np.std(openface_df, axis=0))
                max_features = (np.max(openface_df, axis=0))
                min_features = (np.min(openface_df, axis=0))

                # join several features together
                feature = np.concatenate((mean_features, std_features, min_features, max_features), axis=None)                    
                #feature = np.concatenate((mean_features, std_features, min_features), axis=None)
                #feature = np.concatenate((mean_features, min_features, max_features), axis=None)
                #feature = np.concatenate((max_features, std_features), axis=None)
                #feature=max_features

                total_features=feature
                
                if total_features is not None:
                    x.append(total_features)
                    y.append(emotion_to_index[class_name])

    x=np.array(x)
    y=np.array(y)
    
    print(x.shape,y.shape)
    return x,y

x_train_of, y_train_of = create_openface_dataset(os.path.join(DATA_DIR, 'Train_AFEW'))
x_test_of, y_test_of = create_openface_dataset(os.path.join(DATA_DIR, 'Val_AFEW'))


  0%|          | 0/266 [00:00<?, ?it/s]

  0%|          | 0/148 [00:00<?, ?it/s]

  0%|          | 0/162 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/288 [00:00<?, ?it/s]

  0%|          | 0/234 [00:00<?, ?it/s]

  0%|          | 0/148 [00:00<?, ?it/s]

(773, 1316) (773,)


  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

(383, 1316) (383,)


There are 24 videos from Train and 11 videos from Test where OpenFace completely failed to detect faces. In order to use normalization we have to get rid of them.

In [48]:
mask_x_train_of = np.isnan(x_train_of)
of_has_faces_train = []

for i in range(len(mask_x_train_of)):
    of_has_faces_train.append((len(np.where(mask_x_train_of[i]==1)[0]) == 0))


mask_x_test_of = np.isnan(x_test_of)
of_has_faces_test = []

for i in range(len(mask_x_test_of)):
    of_has_faces_test.append((len(np.where(mask_x_test_of[i]==1)[0]) == 0))

In [49]:
from sklearn import svm,metrics,preprocessing

x_train_of_norm=preprocessing.normalize(x_train_of[of_has_faces_train],norm='l2')
x_test_of_norm=preprocessing.normalize(x_test_of[of_has_faces_test],norm='l2')

In [4]:
import xgboost as xgb
np.random.seed(1)
xgb_clf = xgb.XGBClassifier(n_estimators=1000,use_label_encoder=False)
  
xgb_clf.fit(x_train_of, y_train_of)
y_pred_of = xgb_clf.predict(x_test_of)

print("Complete accuracy:",metrics.accuracy_score(y_test_of, y_pred_of))

Complete accuracy: 0.34986945169712796


In [5]:
of_proba = xgb_clf.predict_proba(x_test_of)

In [10]:
import pickle
model_name = 'xgb'
MODEL2EMOTIW_FEATURES=model_name+'_of_afew.pickle' 

print(MODEL2EMOTIW_FEATURES)

with open(MODEL2EMOTIW_FEATURES, 'wb') as handle:
    pickle.dump(of_proba, handle, protocol=pickle.HIGHEST_PROTOCOL)

xgb_of_afew.pickle


Accuracy is higher when passing all data to XGBoost and the same with and without normalization

## Enet + OpenFace Features

In [51]:
import pickle
model_name = 'enet_b0_8'
MODEL2EMOTIW_FEATURES=model_name+'_afew_torch.pickle' 

print(MODEL2EMOTIW_FEATURES)


enet_b0_8_afew_torch.pickle


In [52]:
with open(MODEL2EMOTIW_FEATURES, 'rb') as handle:
    filename2features_train,filename2features_val=pickle.load(handle)
print(len(filename2features_train),len(filename2features_val))

773 383


In [53]:
def create_dataset(filename2features,data_dir):
    x = []
    y = []
    has_faces=[]
    ind=0
    for class_name in emotion_to_index:
        for filename in tqdm(os.listdir(os.path.join(data_dir,class_name))):
            fn=os.path.splitext(filename)[0] # goes through files names
            if not fn in filename2features:
                continue
            features=filename2features[fn]
            total_features=None
            #print(len(features))
            if True:
                if len(features[0])!=0:
                    cur_features=features[0][features[-1]==1]
                #print(prev,features.shape)
            else:
                cur_features=features[0]
            if len(cur_features)==0:
                has_faces.append(0)
                total_features=np.zeros_like(feature)
            else:
                has_faces.append(1)
                #mean_features=features.mean(axis=0)
                mean_features = (np.mean(cur_features, axis=0))
                std_features = (np.std(cur_features, axis=0))
                max_features = (np.max(cur_features, axis=0))
                min_features = (np.min(cur_features, axis=0))

                # join several features together
                feature = np.concatenate((mean_features, std_features, min_features, max_features), axis=None)                    
                #feature = np.concatenate((mean_features, std_features, min_features), axis=None)
                #feature = np.concatenate((mean_features, min_features, max_features), axis=None)
                #feature = np.concatenate((max_features, std_features), axis=None)
                #feature=max_features

                total_features=feature
            
            if total_features is not None:
                x.append(total_features)
                y.append(emotion_to_index[class_name])
    x=np.array(x)
    y=np.array(y)
    has_faces=np.array(has_faces)
    print(x.shape,y.shape)
    return x,y,has_faces

x_train_enet, y_train_enet, has_faces_train = create_dataset(filename2features_train, os.path.join(DATA_DIR, 'Train_AFEW'))
x_test_enet, y_test_enet, has_faces_test = create_dataset(filename2features_val, os.path.join(DATA_DIR, 'Val_AFEW'))

  0%|          | 0/266 [00:00<?, ?it/s]

  0%|          | 0/148 [00:00<?, ?it/s]

  0%|          | 0/162 [00:00<?, ?it/s]

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/288 [00:00<?, ?it/s]

  0%|          | 0/234 [00:00<?, ?it/s]

  0%|          | 0/148 [00:00<?, ?it/s]

(773, 5120) (773,)


  0%|          | 0/128 [00:00<?, ?it/s]

  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/126 [00:00<?, ?it/s]

  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/92 [00:00<?, ?it/s]

(383, 5120) (383,)


In [54]:
from sklearn import svm,metrics,preprocessing

x_train_enet_norm=preprocessing.normalize(x_train_enet,norm='l2')
x_test_enet_norm=preprocessing.normalize(x_test_enet,norm='l2')

Concatenate features without normalization

In [85]:
x_train_cat = np.concatenate((x_train_enet_norm,x_train_of),axis=1)
x_test_cat = np.concatenate((x_test_enet_norm,x_test_of),axis=1)

valid_train = np.multiply(has_faces_train, of_has_faces_train)
valid_test = np.multiply(has_faces_test, of_has_faces_test)

Concatenate normalized features 

In [113]:
x_train_cat_norm = np.concatenate((x_train_enet_norm[of_has_faces_train],x_train_of_norm),axis=1)
x_test_cat_norm = np.concatenate((x_test_enet_norm[of_has_faces_test],x_test_of_norm),axis=1)
y_train_cat_norm = y_train_enet[of_has_faces_train]
y_test_cat_norm = y_test_enet[of_has_faces_test]

valid_train_cat_norm = np.multiply(has_faces_train[of_has_faces_train], np.array(of_has_faces_train)[of_has_faces_train])
valid_test_cat_norm = np.multiply(has_faces_test[of_has_faces_test], np.array(of_has_faces_test)[of_has_faces_test])

Concatenated features from Enet and OpenFace

In [81]:
import xgboost as xgb
np.random.seed(1)
xgb_clf = xgb.XGBClassifier(n_estimators=2000,use_label_encoder=False)

xgb_clf.fit(x_train_cat, y_train_enet)
y_pred = xgb_clf.predict(x_test_cat)

print("Complete accuracy:",metrics.accuracy_score(y_test_enet, y_pred))

Complete accuracy: 0.5248041775456919


Concatenated normalized features but only that have faces

In [161]:
svc_clf = svm.LinearSVC(C=1.5)

svc_clf.fit(x_train_cat_norm, y_train_cat_norm)
y_pred = svc_clf.predict(x_test_cat_norm)

print("Accuracy:",metrics.accuracy_score(y_test_cat_norm, y_pred))
# fill in with dummy values to get complete accuracy
print("Complete accuracy:",metrics.accuracy_score(np.append(y_test_cat_norm, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=-1)), 
np.append(y_pred, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=100))))

Accuracy: 0.5806451612903226
Complete accuracy: 0.5639686684073107


In [169]:
svc_clf = svm.SVC(C=2, kernel='linear', probability=True)

svc_clf.fit(x_train_cat_norm, y_train_cat_norm)
y_pred = svc_clf.predict(x_test_cat_norm)

print("Accuracy:",metrics.accuracy_score(y_test_cat_norm, y_pred))
# fill in with dummy values to get complete accuracy
print("Complete accuracy:",metrics.accuracy_score(np.append(y_test_cat_norm, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=-1)), 
np.append(y_pred, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=100))))

Accuracy: 0.5698924731182796
Complete accuracy: 0.5535248041775457


In [159]:
rf_clf=RandomForestClassifier(n_estimators=2000,max_depth=10, n_jobs=-1)

rf_clf.fit(x_train_cat_norm, y_train_cat_norm)
y_pred = rf_clf.predict(x_test_cat_norm)

print("Accuracy:",metrics.accuracy_score(y_test_cat_norm, y_pred))
# fill in with dummy values to get complete accuracy
print("Complete accuracy:",metrics.accuracy_score(np.append(y_test_cat_norm, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=-1)), 
np.append(y_pred, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=100))))

Accuracy: 0.5349462365591398
Complete accuracy: 0.5195822454308094


In [160]:
xgb_clf = xgb.XGBClassifier(n_estimators=1500,use_label_encoder=False)

xgb_clf.fit(x_train_cat_norm, y_train_cat_norm)
y_pred = xgb_clf.predict(x_test_cat_norm)

print("Accuracy:",metrics.accuracy_score(y_test_cat_norm, y_pred))
# fill in with dummy values to get complete accuracy
print("Complete accuracy:",metrics.accuracy_score(np.append(y_test_cat_norm, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=-1)), 
np.append(y_pred, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=100))))

Accuracy: 0.5510752688172043
Complete accuracy: 0.5352480417754569


In [170]:
from sklearn.ensemble import VotingClassifier

vote_clf = VotingClassifier(estimators=[('svc', svc_clf), ('rf', rf_clf), ('xgb', xgb_clf)], voting='soft')
vote_clf.fit(x_train_cat_norm, y_train_cat_norm)
y_pred = vote_clf.predict(x_test_cat_norm)

print("Accuracy:",metrics.accuracy_score(y_test_cat_norm, y_pred))
# fill in with dummy values to get complete accuracy
print("Complete accuracy:",metrics.accuracy_score(np.append(y_test_cat_norm, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=-1)), 
np.append(y_pred, np.full((len(y_test_enet)-len(y_test_cat_norm)), fill_value=100))))

Accuracy: 0.5510752688172043
Complete accuracy: 0.5352480417754569


Concatenated features but only that have faces in Openface (without normalization) and Enet 

In [157]:
import xgboost as xgb
xgb_clf = xgb.XGBClassifier(n_estimators=1500,use_label_encoder=False)
 
xgb_clf.fit(x_train_cat[valid_train==1], y_train_enet[valid_train==1])
y_pred = xgb_clf.predict(x_test_cat[valid_test==1])

print("Accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))
print("Complete accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))

Accuracy: 0.553763440860215
Complete accuracy: 0.553763440860215


In [80]:
np.random.seed(1)
rf_clf=RandomForestClassifier(n_estimators=2300,max_depth=12, n_jobs=-1)
    
rf_clf.fit(x_train_cat[valid_train==1], y_train_enet[valid_train==1])
y_pred = rf_clf.predict(x_test_cat[valid_test==1])

print("Accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))
print("Complete accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))

Accuracy: 0.543010752688172
Complete accuracy: 0.543010752688172


Due to different scale of features from Enet and OpenFace, LinearSVC failed to converge

In [82]:
from sklearn.ensemble import VotingClassifier

vote_clf = VotingClassifier(estimators=[('rf', rf_clf), ('xgb', xgb_clf)], voting='soft')
vote_clf.fit(x_train_cat[valid_train==1], y_train_enet[valid_train==1])
y_pred = vote_clf.predict(x_test_cat[valid_test==1])
print("Accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))
print("Complete accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))

Accuracy: 0.5645161290322581
Complete accuracy: 0.5645161290322581


In [83]:
from sklearn.ensemble import StackingClassifier

st_clf = StackingClassifier(estimators=[('rf', rf_clf), ('xgb', xgb_clf)])
st_clf.fit(x_train_cat[valid_train==1], y_train_enet[valid_train==1])
y_pred = st_clf.predict(x_test_cat[valid_test==1])
print("Accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))
print("Complete accuracy:",metrics.accuracy_score(y_test_enet[valid_test==1], y_pred))

Accuracy: 0.553763440860215
Complete accuracy: 0.553763440860215
