In [1]:
%matplotlib inline 

import os
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [2]:
eng_levels = [-2, -1, 0, 1, 2, 3]

## Face Features

In [3]:
person_name = "yagmur"
base_dir = "../face/features/%s" % person_name

In [4]:
labels = pd.read_csv(os.path.join(base_dir, 'levels.tsv'), delimiter='\t', header=None)
scores = pd.read_csv(os.path.join(base_dir, 'scores.tsv'), delimiter='\t', header=None)

In [5]:
# Load data
df = pd.read_csv(os.path.join(base_dir, 'features.csv'))
# Remove empty spaces in column names.
df.columns = [col.replace(" ", "") for col in df.columns]
# Print few values of data.
df.head()

Unnamed: 0,frame,face_id,timestamp,confidence,success,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
0,1,0,0.0,0.98,1,0.309688,0.217516,-0.925624,-0.078749,0.12452,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2,0,0.0,0.88,1,-0.744108,-0.003582,-0.66805,-0.731003,0.035917,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
2,3,0,0.0,0.98,1,-0.730967,-0.056477,-0.680072,-0.739313,-0.004669,...,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
3,4,0,0.0,0.98,1,-0.688489,-0.0797,-0.720854,-0.715481,0.028073,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,5,0,0.0,0.98,1,-0.623707,-0.017633,-0.781459,-0.733528,0.004099,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [6]:
print(len(df), len(labels))

6969 6969


In [7]:
df.describe()

Unnamed: 0,frame,face_id,timestamp,confidence,success,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
count,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,...,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0,6969.0
mean,215.962548,0.0,0.0,0.766288,0.772421,0.130164,0.126324,-0.715448,-0.048672,0.084677,...,0.075477,0.3032,0.13474,0.187975,0.070024,0.11422,0.156263,0.084374,0.004448,0.093988
std,157.337614,0.0,0.0,0.362595,0.4193,0.215496,0.165907,0.392075,0.196111,0.131894,...,0.264179,0.459674,0.34147,0.39072,0.255206,0.318101,0.363131,0.277967,0.066552,0.291832
min,1.0,0.0,0.0,0.0,0.0,-0.799789,-0.46646,-1.0,-0.776111,-0.508339,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,86.0,0.0,0.0,0.88,1.0,0.0,0.0,-0.966496,-0.15022,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,182.0,0.0,0.0,0.98,1.0,0.122265,0.122958,-0.91361,0.0,0.07568,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,327.0,0.0,0.0,0.98,1.0,0.291115,0.25779,-0.789262,0.035816,0.183527,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,666.0,0.0,0.0,0.98,1.0,0.84726,0.578685,0.0,0.776535,0.501084,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [8]:
high_conf_ind = ~np.logical_or(df['confidence'] < 0.5, df['success'] == 0)

df = df.loc[high_conf_ind]
labels = labels.loc[high_conf_ind]
scores = scores.loc[high_conf_ind]

In [9]:
print(len(df), len(labels))

5381 5381


In [10]:
# Define Feature Series Ranges
r_au_intensities = range(df.columns.get_loc("AU01_r"), df.columns.get_loc("AU45_r"))
r_au_class = range(df.columns.get_loc("AU01_c"), df.columns.get_loc("AU45_c"))
r_3d_eye_landmarks = range(df.columns.get_loc("eye_lmk_X_0"), df.columns.get_loc("eye_lmk_Z_55"))
r_gaze_directions = range(df.columns.get_loc("gaze_0_x"), df.columns.get_loc("gaze_angle_y"))
r_pose = range(df.columns.get_loc("pose_Tx"), df.columns.get_loc("pose_Rz"))
r_3d_face_landmarks = range(df.columns.get_loc("X_0"), df.columns.get_loc("Z_67"))

In [11]:
df_au_intensities = df.iloc[:, r_au_intensities]
df_au_class = df.iloc[:, r_au_class]
df_3d_eye_landmarks = df.iloc[:, r_3d_eye_landmarks]
df_gaze_directions = df.iloc[:, r_gaze_directions]
df_pose = df.iloc[:, r_pose]
df_3d_face_landmarks = df.iloc[:, r_3d_face_landmarks]

In [12]:
df_au_intensities['label'] = labels.values
df_au_class['label'] = labels.values
df_3d_eye_landmarks['label'] = labels.values
df_gaze_directions['label'] = labels.values
df_pose['label'] = labels.values
df_3d_face_landmarks['label'] = labels.values

df_au_intensities['score'] = scores.values
df_au_class['score'] = scores.values
df_3d_eye_landmarks['score'] = scores.values
df_gaze_directions['score'] = scores.values
df_pose['score'] = scores.values
df_3d_face_landmarks['score'] = scores.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_au_intensities['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_au_class['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_3d_eye_landmarks['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .lo

In [15]:
df_face_and_pose = pd.concat([df_3d_face_landmarks.iloc[:, :-2],
df_pose],axis=1)

df_all = pd.concat([df_3d_eye_landmarks.iloc[:, :-2], 
df_au_intensities.iloc[:, :-2],
df_gaze_directions.iloc[:, :-2],
df_3d_face_landmarks.iloc[:, :-2],
df_pose],axis=1)


feature_sets = {
    "AU Intensity": df_au_intensities,
    "3D Eye Landmark": df_3d_eye_landmarks,
    "3D Face Landmark": df_3d_face_landmarks,
    "Gaze Directions": df_gaze_directions,
    "Head Pose": df_pose,
    "3D Face and Head Pose": df_face_and_pose,
    "All OpenFace Fts": df_all
}

In [16]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

In [17]:
classifier_names = ['LR', 'knn', 'rbf svm', 'random forest', 'boosted trees', 'adaboost']
classifiers = [LogisticRegression(random_state=42, solver="liblinear"),
                KNeighborsClassifier(n_neighbors=6),
                SVC(gamma=2, C=1),
                RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
                GradientBoostingClassifier(n_estimators=10, learning_rate=1, max_depth=5),
                AdaBoostClassifier(n_estimators=100, random_state=42)]

results = pd.DataFrame(columns= ['LR', 'knn', 'rbf svm', 'random forest', 'boosted trees', 'adaboost', 'title'])

In [18]:
for title in feature_sets:
    s = [0, 0, 0, 0, 0, 0, "title"]
    dfc = feature_sets[title]
    not_zero_ind = ~(dfc == 0).all(axis=1)

    dfc = dfc.loc[not_zero_ind]
    labels = dfc['label'].loc[not_zero_ind]


    scaler = StandardScaler()
    scaled_samples = scaler.fit_transform(dfc.iloc[:,:-2])

    X_train, X_test, y_train, y_test = train_test_split(scaled_samples, labels, test_size=0.2, random_state=42, stratify=labels)

    i = 0
    for model in classifiers:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        res = f1_score(y_test, y_pred, average='weighted')
        print(title, model, res)
        s[i] = res
        i +=1
    s[i] = title
    results.loc[len(results.index)] = s
    #results.head()

In [19]:
results.to_csv('reports/f1_scores_face_%s.csv' % person_name)

In [None]:
### !!!! Also test with kmeans featurizer !!! ###

In [None]:
from sklearn.metrics import roc_curve, auc

def test_roc(y_test, y_pred):
    fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=1)
    return fpr, tpr


In [None]:
from sklearn.model_selection import cross_val_score

i = 0
for model in classifiers: 
    scores = cross_val_score(model, scaled_samples, labels, cv=5)
    print("cross val scores of scaled %s:" % classifier_names[i], scores)
    scores = cross_val_score(model, dfc.iloc[:,:-2], labels, cv=5)
    print("cross val scores of %s:" % classifier_names[i], scores)

    i +=1

In [None]:
plt.figure()

for c in classifier_names:
    fpr, tpr = test_roc(y_test.values, y_pred)
    plt.plot(fpr, tpr, label=c)

plt.plot([0, 1], [0, 1], 'k--')
plt.legend()