In [None]:
%matplotlib inline 

import os
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [None]:
eng_levels = [-2, -1, 0, 1, 2, 3]

## OpenPose Features

In [None]:
### !!!! Also test with kmeans featurizer !!! ###

In [None]:
base_dir = "../pose-action/features/"

In [None]:
# Load data
df = pd.read_csv(os.path.join(base_dir, 'pose_keypoints_with_labels.csv'))
labels = df['label']
#df = df.iloc[:,:-1]
df.head()

In [None]:
# Define Feature Series Ranges
r_p1 = range(0,75)
r_p2 = range(75,149)
r_p3 = range(150, 224)
r_p4 = range(225,299)

df_p1 = df.iloc[:, r_p1]
df_p2 = df.iloc[:, r_p2]
df_p3 = df.iloc[:, r_p3]
df_p4 = df.iloc[:, r_p4]

In [None]:
df_p1['label'] = labels.values
df_p2['label'] = labels.values
df_p3['label'] = labels.values
df_p4['label'] = labels.values

In [None]:
feature_sets = {
    "P1": df_p1,
    "P2": df_p2,
    "P3": df_p3,
    "P4": df_p4,
    "All Features": df
}

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

In [None]:
classifier_names = ['LR', 'knn', 'rbf svm', 'random forest', 'boosted trees', 'mlp']
classifiers = [LogisticRegression(random_state=42, solver="liblinear"),
                KNeighborsClassifier(n_neighbors=6),
                SVC(gamma=2, C=1),
                RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
                GradientBoostingClassifier(n_estimators=10, learning_rate=1, max_depth=5),
                MLPClassifier(hidden_layer_sizes=(300, 50, 6), random_state=42, max_iter=300)]

results = pd.DataFrame(columns= ['LR', 'knn', 'rbf svm', 'random forest', 'boosted trees', 'mlp', 'title'])

for title in feature_sets:
    s = [0, 0, 0, 0, 0, 0, "title"]
    dfc = feature_sets[title]
    not_zero_ind = ~(dfc == 0).all(axis=1)

    dfc = dfc.loc[not_zero_ind]
    labels = dfc['label'].loc[not_zero_ind]

    not_nan_index = ~dfc.isna().any(axis=1)
    dfc = dfc[not_nan_index]
    labels = labels[not_nan_index]

    scaler = StandardScaler()
    scaled_samples = scaler.fit_transform(dfc.iloc[:,:-2])

    X_train, X_test, y_train, y_test = train_test_split(scaled_samples, labels, test_size=0.2, random_state=42, stratify=labels)

    i = 0
    for model in classifiers:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        res = f1_score(y_test, y_pred, average='weighted')
        print(title, model, res)
        s[i] = res
        i +=1
    s[i] = title
    results.loc[len(results.index)] = s
    #results.head()

In [None]:
results.to_csv('reports/f1_scores_pose.csv')

In [None]:
from sklearn.model_selection import cross_val_score

i = 0
for model in classifiers: 
    scores = cross_val_score(model, scaled_samples, labels, cv=5)
    print("cross val scores of scaled %s:" % classifier_names[i], scores)
    scores = cross_val_score(model, dfc.iloc[:,:-2], labels, cv=5)
    print("cross val scores of %s:" % classifier_names[i], scores)

    i +=1

In [None]:
plt.figure()

for c in classifier_names:
    fpr, tpr = test_roc(y_test.values, y_pred)
    plt.plot(fpr, tpr, label=c)

plt.plot([0, 1], [0, 1], 'k--')
plt.legend()