In [1]:
%matplotlib inline 

import os
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [2]:
eng_levels = [-2, -1, 0, 1, 2, 3]

## Face Features

In [3]:
person_name = "yagmur"
base_dir = "../face/features/%s" % person_name

In [4]:
labels = pd.read_csv(os.path.join(base_dir, 'levels.tsv'), delimiter='\t', header=None)

In [5]:
# Load data
df_face = pd.read_csv(os.path.join(base_dir, 'features.csv'))
# Remove empty spaces in column names.
df_face.columns = [col.replace(" ", "") for col in df_face.columns]
# Print few values of data.
df_face.head()

Unnamed: 0,frame,face_id,timestamp,confidence,success,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
0,1,0,0.0,0.98,1,0.309688,0.217516,-0.925624,-0.078749,0.12452,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2,0,0.0,0.88,1,-0.744108,-0.003582,-0.66805,-0.731003,0.035917,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
2,3,0,0.0,0.98,1,-0.730967,-0.056477,-0.680072,-0.739313,-0.004669,...,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
3,4,0,0.0,0.98,1,-0.688489,-0.0797,-0.720854,-0.715481,0.028073,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,5,0,0.0,0.98,1,-0.623707,-0.017633,-0.781459,-0.733528,0.004099,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [6]:
print(len(df_face), len(labels))

6969 6969


In [7]:
high_conf_ind = ~np.logical_or(df_face['confidence'] < 0.5, df_face['success'] == 0)

df_face = df_face.loc[high_conf_ind]
labels = labels.loc[high_conf_ind]

In [8]:
df_face = df_face.iloc[:5350,:]
labels = labels.iloc[:5350,:]
print(len(df_face), len(labels))

5350 5350


# OpenPose Features

In [9]:
base_dir = "../pose-action/features/"

In [17]:
# Load data
df_pose = pd.read_csv(os.path.join(base_dir, 'pose_keypoints_with_labels.csv'))
df_dist = pd.read_csv(os.path.join(base_dir, 'pose_distances.csv'))
df_pose = df_pose.loc[high_conf_ind]
df_dist = df_dist.loc[high_conf_ind]
print(df_pose.shape, df_dist.shape)

(5350, 301) (5350, 6)


In [11]:
# Define Feature Series Ranges
r_au_intensities = range(df_face.columns.get_loc("AU01_r"), df_face.columns.get_loc("AU45_r"))
r_au_class = range(df_face.columns.get_loc("AU01_c"), df_face.columns.get_loc("AU45_c"))
r_3d_eye_landmarks = range(df_face.columns.get_loc("eye_lmk_X_0"), df_face.columns.get_loc("eye_lmk_Z_55"))
r_gaze_directions = range(df_face.columns.get_loc("gaze_0_x"), df_face.columns.get_loc("gaze_angle_y"))
r_pose = range(df_face.columns.get_loc("pose_Tx"), df_face.columns.get_loc("pose_Rz"))
r_3d_face_landmarks = range(df_face.columns.get_loc("X_0"), df_face.columns.get_loc("Z_67"))

In [12]:
df_au_intensities = df_face.iloc[:, r_au_intensities]
df_au_class = df_face.iloc[:, r_au_class]
df_3d_eye_landmarks = df_face.iloc[:, r_3d_eye_landmarks]
df_gaze_directions = df_face.iloc[:, r_gaze_directions]
df_pose = df_face.iloc[:, r_pose]
df_3d_face_landmarks = df_face.iloc[:, r_3d_face_landmarks]

In [13]:
df_au_intensities['label'] = labels.values
df_au_class['label'] = labels.values
df_3d_eye_landmarks['label'] = labels.values
df_gaze_directions['label'] = labels.values
df_pose['label'] = labels.values
df_3d_face_landmarks['label'] = labels.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_au_intensities['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_au_class['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_3d_eye_landmarks['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .lo

In [15]:
body_keypoints = [
    {0,  "Nose"},
    {1,  "Neck"},
    {2,  "RShoulder"},
    {3,  "RElbow"},
    {4,  "RWrist"},
    {5,  "LShoulder"},
    {6,  "LElbow"},
    {7,  "LWrist"},
    {8,  "MidHip"},
    {9,  "RHip"},
    {10, "RKnee"},
    {11, "RAnkle"},
    {12, "LHip"},
    {13, "LKnee"},
    {14, "LAnkle"},
    {15, "REye"},
    {16, "LEye"},
    {17, "REar"},
    {18, "LEar"},
    {19, "LBigToe"},
    {20, "LSmallToe"},
    {21, "LHeel"},
    {22, "RBigToe"},
    {23, "RSmallToe"},
    {24, "RHeel"},
    {25, "Background"}
]

In [18]:
# Define Feature Series Ranges
# r_p1 = range(0,74)
# r_p2 = range(75,149)
# r_p3 = range(150, 224)
# r_p4 = range(225,299)

# Body Points
r_p1 = range(0,24)
r_p2 = range(75,99)
r_p3 = range(150, 174)
r_p4 = range(225,249)

df_p1 = df_pose.iloc[:, r_p1]
df_p2 = df_pose.iloc[:, r_p2]
df_p3 = df_pose.iloc[:, r_p3]
df_p4 = df_pose.iloc[:, r_p4]

In [19]:
df_p1['label'] = labels.values
df_p2['label'] = labels.values
df_p3['label'] = labels.values
df_p4['label'] = labels.values

df_dist['label'] = labels.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_p1['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_p2['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_p3['label'] = labels.values
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = val

In [26]:
df_face_and_p4_p2_pose = pd.concat([df_3d_face_landmarks.iloc[:, :-1], df_p4.iloc[:, :-1], df_p2],axis=1)

df_eye_and_p2_pose = pd.concat([df_3d_eye_landmarks.iloc[:, :-1], df_p2.iloc[:, :-1], df_pose],axis=1)

df_face_and_pose_dist = pd.concat([df_3d_face_landmarks.iloc[:, :-1], df_dist],axis=1)

In [27]:
feature_sets = {
    "Euclidian Distance and Face": df_face_and_pose_dist,
    "Face - P2 and P4 Body Keypoints": df_face_and_p4_p2_pose,
    "Eye and P2 Body Keypoints": df_eye_and_p2_pose
}

In [31]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

In [32]:
classifier_names = ['LR', 'knn', 'rbf svm', 'random forest', 'boosted trees', 'mlp']
classifiers = [LogisticRegression(random_state=42, solver="liblinear"),
                KNeighborsClassifier(n_neighbors=6),
                SVC(gamma=2, C=1),
                RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
                GradientBoostingClassifier(n_estimators=10, learning_rate=1, max_depth=5),
                MLPClassifier(hidden_layer_sizes=(300, 50, 6), random_state=42, max_iter=300)]

results = pd.DataFrame(columns= ['LR', 'knn', 'rbf svm', 'random forest', 'boosted trees', 'adaboost', 'title'])

In [33]:
for title in feature_sets:
    s = [0, 0, 0, 0, 0, 0, "title"]
    dfc = feature_sets[title]
    not_zero_ind = ~(dfc == 0).all(axis=1)

    dfc = dfc.loc[not_zero_ind]
    labels = dfc['label'].loc[not_zero_ind]


    scaler = StandardScaler()
    scaled_samples = scaler.fit_transform(dfc.iloc[:,:-2])

    X_train, X_test, y_train, y_test = train_test_split(scaled_samples, labels, test_size=0.2, random_state=42, stratify=labels)

    i = 0
    for model in classifiers:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        res = f1_score(y_test, y_pred, average='weighted')
        print(title, model, res)
        s[i] = res
        i +=1
    s[i] = title
    results.loc[len(results.index)] = s
    #results.head()

Euclidian Distance and Face LogisticRegression(random_state=42, solver='liblinear') 0.4895874802847817
Euclidian Distance and Face KNeighborsClassifier(n_neighbors=6) 0.6577751796883542


  warn("Multiclass is still experimental. Subject to change per release.")
  warn("Detected multiclass problem: forcing interactions to 0")


Euclidian Distance and Face ExplainableBoostingClassifier(feature_names=['feature_0001', 'feature_0002',
                                             'feature_0003', 'feature_0004',
                                             'feature_0005', 'feature_0006',
                                             'feature_0007', 'feature_0008',
                                             'feature_0009', 'feature_0010',
                                             'feature_0011', 'feature_0012',
                                             'feature_0013', 'feature_0014',
                                             'feature_0015', 'feature_0016',
                                             'feature_0017', 'feature_0018',
                                             'feature_0019', 'feature_0020',
                                             'feat...
                                             'continuous', 'continuous',
                                             'continuous', 'continuous',
  

ValueError: Shape of passed values is (4280, 250), indices imply (4280, 208)

In [25]:
results.to_csv('reports/f1_scores_combined_2_%s.csv' % person_name)

In [None]:
### !!!! Also test with kmeans featurizer !!! ###

In [None]:
from sklearn.metrics import roc_curve, auc

def test_roc(y_test, y_pred):
    fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=1)
    return fpr, tpr


In [None]:
from sklearn.model_selection import cross_val_score

i = 0
for model in classifiers: 
    scores = cross_val_score(model, scaled_samples, labels, cv=5)
    print("cross val scores of scaled %s:" % classifier_names[i], scores)
    scores = cross_val_score(model, dfc.iloc[:,:-2], labels, cv=5)
    print("cross val scores of %s:" % classifier_names[i], scores)

    i +=1

In [None]:
plt.figure()

for c in classifier_names:
    fpr, tpr = test_roc(y_test.values, y_pred)
    plt.plot(fpr, tpr, label=c)

plt.plot([0, 1], [0, 1], 'k--')
plt.legend()