In [25]:
import pandas as pd
import os
import ujson
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

from imblearn.over_sampling import SMOTE

from config import *

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import sampler

from detector_validation_helpers import calc_prf

torch.manual_seed(1)

<torch._C.Generator at 0x1a396a37b0>

# load/preprocess openpose data into train, val, test

In [2]:
SAMPLE_DATAFRAME_PATH = 'data/gold_sample.json'
new_gold = pd.read_json(SAMPLE_DATAFRAME_PATH)
new_gold.head()

Unnamed: 0,face_openpose,face_openpose_nose,face_present,frame,hand_openpose,hand_openpose_wrist,hand_present,vid_name,vid_path,face_keypoints,pose_keypoints,hand_left_keypoints,hand_right_keypoints,tuples
0,0,1,1,3515,0,,1,S_20141112_2426_03.mp4,/scratch/groups/mcfrank/Home_Headcam_new/Samca...,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0.471533, 0.0980919, 0.37534799999999996, 0....","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,0,0,0,4925,0,,1,S_20131127_1310_04.mp4,/scratch/groups/mcfrank/Home_Headcam_new/Samca...,[],[],[],[],"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
10,1,1,1,8785,1,,1,S_20141228_2611_08.mp4,/scratch/groups/mcfrank/Home_Headcam_new/Samca...,"[[0.281036, 0.524218, 0.0931592, 0.281939, 0.5...","[[0.295931, 0.558211, 0.7980659999999999, 0.37...","[[0.319839, 0.901076, 0.47536000000000006, 0.3...","[[0.183019, 0.7725029999999999, 0.0520404, 0.1...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
100,0,0,1,14425,0,,0,S_20130619_0802_03.mp4,/scratch/groups/mcfrank/Home_Headcam_new/Samca...,[],[],[],[],"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1000,0,0,1,1470,0,,0,S_20141115_2429_01.mp4,/scratch/groups/mcfrank/Home_Headcam_new/Samca...,[],[],[],[],"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [3]:
new_gold.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24000 entries, 0 to 9999
Data columns (total 14 columns):
face_openpose           24000 non-null int64
face_openpose_nose      24000 non-null int64
face_present            24000 non-null int64
frame                   24000 non-null int64
hand_openpose           24000 non-null int64
hand_openpose_wrist     12000 non-null float64
hand_present            24000 non-null int64
vid_name                24000 non-null object
vid_path                24000 non-null object
face_keypoints          24000 non-null object
pose_keypoints          24000 non-null object
hand_left_keypoints     24000 non-null object
hand_right_keypoints    24000 non-null object
tuples                  24000 non-null object
dtypes: float64(1), int64(6), object(7)
memory usage: 2.7+ MB


Folded below: utility functions for face presence calculations

In [4]:
import ntpath

def get_op_xyconf(keypt_lists):
    x = []
    y = []
    conf = []
    for keypt in keypt_lists:
        x.append(keypt[0::3]) 
        y.append(keypt[1::3])
        conf.append(keypt[2::3])
    if x == [] or y == [] or conf == []:
        return [], [], []
    
    return x, y, conf

def get_pose_keypoints(vid_path, frame):
    vid_name = ntpath.basename(vid_path)[:-4]
    frame_num = str(frame).zfill(12)
    filename = f'{vid_name}_{frame_num}_keypoints.json'
    fp = os.path.join('/scratch/users/agrawalk/headcam-algo-output/gold_sample_openpose/', vid_name, filename)
    if not os.path.exists(fp):
        print('near start or end of video')
        return []
    keypts = ujson.load(open(fp, 'r'))
    return [person['pose_keypoints'] for person in keypts['people']]

def get_face_keypoints(vid_path, frame):
    vid_name = ntpath.basename(vid_path)[:-4]
    frame_num = str(frame).zfill(12)
    filename = f'{vid_name}_{frame_num}_keypoints.json'
    fp = os.path.join('/scratch/users/agrawalk/headcam-algo-output/gold_sample_openpose/', vid_name, filename)
    if not os.path.exists(fp):
        print('near start or end of video')
        return []
    keypts = ujson.load(open(fp, 'r'))
    return [person['face_keypoints'] for person in keypts['people']]

def get_hand_left_keypoints(vid_path, frame):
    vid_name = ntpath.basename(vid_path)[:-4]
    frame_num = str(frame).zfill(12)
    filename = f'{vid_name}_{frame_num}_keypoints.json'
    fp = os.path.join('/scratch/users/agrawalk/headcam-algo-output/gold_sample_openpose/', vid_name, filename)
    if not os.path.exists(fp):
        print('near start or end of video')
        return []
    keypts = ujson.load(open(fp, 'r'))
    return [person['hand_left_keypoints'] for person in keypts['people']]

def get_hand_right_keypoints(vid_path, frame):
    vid_name = ntpath.basename(vid_path)[:-4]
    frame_num = str(frame).zfill(12)
    filename = f'{vid_name}_{frame_num}_keypoints.json'
    fp = os.path.join('/scratch/users/agrawalk/headcam-algo-output/gold_sample_openpose/', vid_name, filename)
    if not os.path.exists(fp):
        print('near start or end of video')
        return []
    keypts = ujson.load(open(fp, 'r'))
    return [person['hand_right_keypoints'] for person in keypts['people']]

In [5]:
#Doesn't need to be run; already in the df
# new_gold['face_keypoints'] = new_gold.apply(lambda row: get_face_keypoints(row['vid_path'], row['frame']), axis=1)
# print('pose')
# new_gold['pose_keypoints'] = new_gold.apply(lambda row: get_pose_keypoints(row['vid_path'], row['frame']), axis=1)
# print('left')
# new_gold['hand_left_keypoints'] = new_gold.apply(lambda row: get_hand_left_keypoints(row['vid_path'], row['frame']), axis=1)
# print('right')
# new_gold['hand_right_keypoints'] = new_gold.apply(lambda row: get_hand_right_keypoints(row['vid_path'], row['frame']), axis=1)

In [6]:
#Functions to be applied row-wise to dataframes to calculate columns

def face_openpose(row):
    return 1 if np.sum(row['face_keypoints']) != 0 else 0

def face_openpose_nose(row):
    nose_keypts = [person_pose[0*3+2] for person_pose in row['pose_keypoints']]
    return 1 if np.sum(nose_keypts) != 0 else 0

def hand_openpose(row):
    return 1 if np.sum(row['hand_left_keypoints']) != 0 or np.sum(row['hand_right_keypoints'])  != 0 else 0

def hand_openpose_wrist(row):
    #turns out to be the same as hand_openpose
    hand_keypts = [np.array(person_pose[[4*3+2, 7*3+2]]) for person_pose in row['pose_keypoints']]
    return 1 if np.sum(hand_keypts) != 0 else 0

"""Note: you need the files for this one; coming soon."""
# def get_keypts_tuple(row, keypt_type, tuple_size=5):
#     vid_name = row['vid_name'][:-4]
#     middle_frame = row['frame']
#     keypts_tuple = []
    
#     for frame in range(middle_frame - tuple_size//2, middle_frame + tuple_size//2 + 1):
#         frame = str(frame).zfill(12)
#         filename = f'{vid_name}_{frame}_keypoints.json'
#         fp = os.path.join('/scratch/users/agrawalk/headcam-algo-output/gold_sample_openpose/', vid_name, filename)
        
#         if not os.path.exists(fp):
#             if frame == middle_frame:
#                 return -1 #if the center frame doesn't exist, mark it for discarding
#             keypts_tuple.append([0]*70*3)
#             continue 
            
#         keypts = ujson.load(open(fp, 'r'))
#         keypts = [person[f'{keypt_type}_keypoints'] for person in keypts['people']]
#         keypts_tuple.append([0]*70*3 if len(keypts) == 0 else keypts[0])
    
#     return keypts_tuple

'Note: you need the files for this one; coming soon.'

In [7]:
# new_gold['face_openpose'] = new_gold.apply(face_openpose, axis=1)
# new_gold['face_openpose_nose'] = new_gold.apply(face_openpose_nose, axis=1)
# new_gold['hand_openpose'] = new_gold.apply(hand_openpose, axis=1)
# new_gold['hand_openpose_wrist'] = new_gold.apply(hand_openpose_wrist, axis=1)

In [8]:
print('Face PRF Scores: Raw')
prf = calc_prf(new_gold['face_openpose'], new_gold['face_present'])
print(f'face_openpose: {prf}')
prf = calc_prf(new_gold['face_openpose_nose'], new_gold['face_present'])
print(f'face_openpose_nose: {prf}')
print()

print('Hand PRF Scores: Raw')
prf = calc_prf(new_gold['hand_openpose'], new_gold['hand_present'])
print(f'hand_openpose: {prf}')
prf = calc_prf(new_gold['hand_openpose_wrist'], new_gold['hand_present'])
print(f'hand_openpose_wrist: {prf}')

Face PRF Scores: Raw
face_openpose: (0.7270251872021783, 0.5531767955801105, 0.6282968918521423)
face_openpose_nose: (0.6190910472716218, 0.7032113259668509, 0.6584754668175572)

Hand PRF Scores: Raw
hand_openpose: (0.7432731293770733, 0.3930416138777897, 0.5141837190029961)
hand_openpose_wrist: (0.719921875, 0.17961212357470033, 0.2874970751111458)


In [9]:
"""Code coming soon for this"""
# print('Getting face tuples...')
# new_gold['face_tuple'] = new_gold.apply(lambda row: get_keypts_tuple(row, 'face'), axis=1)
# print('Getting pose tuples...')
# new_gold['pose_tuple'] = new_gold.apply(lambda row: get_keypts_tuple(row, 'pose'), axis=1)

'Code coming soon for this'

In [10]:
#Next up: (maybe later: tuple of xy+conf, xy+conf) tuple of conf, conf
# X = new_gold['face_tuple'].values #NOTE: need the tuples data for this one-- too big for Github, coming soon.
X = new_gold['tuples'].values 
print(X.shape)

(24000,)


In [11]:
new_X = []
for i, x in enumerate(X):
    #if i % 1000 == 0: print(i)
    new_X.append(np.array(x))
new_X = np.array(new_X)
X = new_X

In [None]:
# each element of tuple has 130 confidences-- the confidences for pose (18), face (70), L hand (21), 
# and R hand (21) keypoints, in that order. If you want to only keep the pose and face keypoints, for example, 
# you could say X = X[.., :88] to cut off the hand keypoints.
# Q: are these the sum all confidences for all detected people per frame?

In [12]:
# faces
y = new_gold['face_present'].values
y = np.array([np.array(yi) for yi in y])

# hands
yh = new_gold['hand_present'].values
yh = np.array([np.array(yi) for yi in yh])

print(X.shape, y.shape, yh.shape)

(24000, 5, 130) (24000,) (24000,)


In [13]:
X = X.reshape(X.shape[0], -1) # flatten the five frames of confidences

In [61]:
# scale feature values?
#from sklearn import preprocessing
#Xsc = preprocessing.scale(X) 

In [21]:
# we will do cross-validated hyperparameter fitting on 80% of the data, and then evaluate on the final 20%

# faces
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# hands
Xh_train, Xh_test, yh_train, yh_test = train_test_split(X, yh, test_size=0.2, random_state=1)

#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=1)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) # X_val.shape, y_val.shape,

(19200, 650) (19200,) (4800, 650) (4800,)


In [22]:
print("Faces train/test:")
print(pd.DataFrame(y_train)[0].value_counts())
print(pd.DataFrame(y_test)[0].value_counts())
print("Hands train/test:")
print(pd.DataFrame(yh_train)[0].value_counts())
print(pd.DataFrame(yh_test)[0].value_counts())

Faces train/test:
0    14577
1     4623
Name: 0, dtype: int64
0    3631
1    1169
Name: 0, dtype: int64
Hands train/test:
0    10923
1     8277
Name: 0, dtype: int64
0    2816
1    1984
Name: 0, dtype: int64


In [23]:
sm = SMOTE(sampling_strategy=1.0, random_state=12)
X_train, y_train = sm.fit_sample(X_train, y_train) # faces

Xh_train, yh_train = sm.fit_sample(Xh_train, yh_train) # hands

# upsample detections

In [130]:
def middle_frame(X):
    return X[:, X.shape[1]//2, :]

In [162]:
allTrain = pd.DataFrame(X_train) 
trainClass = pd.DataFrame(data=y_train, columns=['present'])
allTrain = pd.concat([allTrain, trainClass], axis=1)

In [163]:
from sklearn.utils import resample


# separate minority and majority classes
no_detections = allTrain[allTrain.present==0] # majority
detections = allTrain[allTrain.present==1] # minority

# upsample minority
X_upsampled = resample(detections,
                          replace=True, # sample with replacement
                          n_samples=len(no_detections), # match number in majority class
                          random_state=27) # reproducible results

# combine majority and upsampled minority
upsampled = pd.concat([no_detections, X_upsampled])

# check new class counts
upsampled.present.value_counts()

1    14577
0    14577
Name: present, dtype: int64

In [164]:
y_train = upsampled.present
X_train = upsampled.drop('present', axis=1)

# create classifiers

In [48]:
def rfc_model(X, y):
    # Perform Grid-Search
    gsc = GridSearchCV(
        estimator=RandomForestClassifier(),
        param_grid={
            'max_depth': range(2,7),
            'n_estimators': (10, 50, 100, 1000),
        }, 
        cv=5, 
        scoring='f1_weighted', 
        verbose=0, n_jobs=-1)
    
    grid_result = gsc.fit(X, y)
    best_params = grid_result.best_params_
    
    rfc = RandomForestClassifier(max_depth=best_params["max_depth"], n_estimators=best_params["n_estimators"], 
                                 random_state=False, verbose=False)
    # K-Fold CV
    scores = cross_val_score(rfc, X, y, cv=5, scoring='f1_weighted')
    return (rfc, best_params, scores)


In [None]:
face_rf_model, face_rf_params, face_rf_scores = rfc_model(X_train, y_train)

In [None]:
hand_rf_model, hand_rf_params, hand_rf_scores = rfc_model(Xh_train, yh_train)

In [28]:
print(face_rf_scores) # CV faces getting .785 with upsampling, SMOTE = .7945

face_rf_model.fit(X_train, y_train)
f_pred = face_rf_model.predict(X_test)
print(classification_report(y_test,f_pred))

[0.77421262 0.78330646 0.81004933 0.80290346 0.80219998]


In [35]:
print(hand_rf_scores) # CV hands with SMOTE getting .63

hand_rf_model.fit(Xh_train, yh_train)
h_pred = hand_rf_model.predict(Xh_test)
print(classification_report(yh_test, h_pred))

[0.63235099 0.62244092 0.63154637 0.65064885 0.63453031]
              precision    recall  f1-score   support

           0       0.67      0.88      0.76      2816
           1       0.69      0.39      0.49      1984

    accuracy                           0.67      4800
   macro avg       0.68      0.63      0.63      4800
weighted avg       0.68      0.67      0.65      4800



In [37]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
dt = DecisionTreeClassifier(max_depth=5)

print(cross_val_score(dt, X_train, y_train, cv=5)) # .76-.80
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test) 
print(classification_report(y_test, dt_pred)) # .84

[0.7638484  0.77396673 0.7942034  0.79523238 0.78833619]
              precision    recall  f1-score   support

           0       0.88      0.91      0.89      3631
           1       0.68      0.63      0.65      1169

    accuracy                           0.84      4800
   macro avg       0.78      0.77      0.77      4800
weighted avg       0.83      0.84      0.84      4800



In [39]:
dt.fit(Xh_train, yh_train)
dt_pred = dt.predict(Xh_test) 
print("Hands:")
print(classification_report(yh_test, dt_pred))

Hands:
              precision    recall  f1-score   support

           0       0.67      0.86      0.75      2816
           1       0.66      0.40      0.50      1984

    accuracy                           0.67      4800
   macro avg       0.67      0.63      0.62      4800
weighted avg       0.67      0.67      0.65      4800



In [38]:
ab = AdaBoostClassifier()
print(cross_val_score(ab, X_train, y_train, cv=5)) # .77-.80
ab.fit(X_train, y_train)
ab_pred = ab.predict(X_test) 
print(classification_report(y_test, ab_pred)) 

[0.77070828 0.77242326 0.80243526 0.79608986 0.79725557]
              precision    recall  f1-score   support

           0       0.89      0.89      0.89      3631
           1       0.65      0.65      0.65      1169

    accuracy                           0.83      4800
   macro avg       0.77      0.77      0.77      4800
weighted avg       0.83      0.83      0.83      4800



In [40]:
ab.fit(Xh_train, yh_train)
ab_pred = ab.predict(Xh_test) 
print(classification_report(yh_test, ab_pred)) 

              precision    recall  f1-score   support

           0       0.68      0.85      0.75      2816
           1       0.67      0.42      0.51      1984

    accuracy                           0.67      4800
   macro avg       0.67      0.64      0.63      4800
weighted avg       0.67      0.67      0.66      4800



In [45]:
mlp = MLPClassifier(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', alpha=0.0001, 
                    batch_size='auto', learning_rate='constant', learning_rate_init=0.001, 
                    power_t=0.5, max_iter=300, shuffle=True, random_state=None, tol=0.0001, 
                    verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, 
                    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, 
                    epsilon=1e-08, n_iter_no_change=10)

In [42]:
logreg = LogisticRegression(solver='lbfgs', max_iter=500, random_state=1)

In [167]:
class OpenposeLSTM(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, tagset_size):
        super(OpenposeLSTM, self).__init__()
        self.hidden_dim = hidden_dim

#         self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True, batch_first=True)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim*2*2, tagset_size)

    def init_hidden(self):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(1, BATCH_SIZE, self.hidden_dim),
                torch.zeros(1, BATCH_SIZE, self.hidden_dim))
    
    def forward(self, keypts):
        keypts = torch.Tensor(keypts)
#         embeds = self.word_embeddings(sentence)
#         print(keypts.shape)
        lstm_out, _ = self.lstm(keypts)
#         print(lstm_out[:, 0, :].shape)

        # concatenating the first and last sequence element outputs 
        # (the ends of the reverse and forward chains, respectively)
        lstm_out = torch.cat((lstm_out[:, 0], lstm_out[:, -1]), dim=1)
#         print(lstm_out.shape)
        tag_space = self.hidden2tag(lstm_out)
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_space

In [171]:
EMBEDDING_DIM = 70
HIDDEN_DIM = 64
lstm = OpenposeLSTM(EMBEDDING_DIM, HIDDEN_DIM, 2)

# train classifiers on openpose data

In [43]:
logreg.fit(X_train, y_train)

print('Faces: Logistic regression:')
y_pred = logreg.predict(X_test)
print(classification_report(y_test, y_pred))

Faces: Logistic regression:
              precision    recall  f1-score   support

           0       0.88      0.90      0.89      3631
           1       0.67      0.63      0.65      1169

    accuracy                           0.83      4800
   macro avg       0.78      0.77      0.77      4800
weighted avg       0.83      0.83      0.83      4800



In [44]:
logreg.fit(Xh_train, yh_train)

print('Hands: Logistic regression:')
y_pred = logreg.predict(X_test)
print(classification_report(y_test, y_pred))

Hands: Logistic regression:
              precision    recall  f1-score   support

           0       0.86      0.87      0.87      3631
           1       0.59      0.57      0.58      1169

    accuracy                           0.80      4800
   macro avg       0.73      0.72      0.73      4800
weighted avg       0.80      0.80      0.80      4800



In [46]:
mlp.fit(X_train, y_train)

print('Faces: MLP:')
y_pred = mlp.predict(X_test)
print(classification_report(y_test, y_pred))

Faces: MLP:
              precision    recall  f1-score   support

           0       0.86      0.90      0.88      3631
           1       0.63      0.55      0.59      1169

    accuracy                           0.81      4800
   macro avg       0.75      0.72      0.73      4800
weighted avg       0.80      0.81      0.81      4800



In [47]:
mlp.fit(Xh_train, yh_train)

print('Hands: MLP:')
y_pred = mlp.predict(Xh_test)
print(classification_report(yh_test, y_pred))

Hands: MLP:
              precision    recall  f1-score   support

           0       0.67      0.81      0.73      2816
           1       0.62      0.44      0.51      1984

    accuracy                           0.66      4800
   macro avg       0.65      0.62      0.62      4800
weighted avg       0.65      0.66      0.64      4800



In [175]:
X_train.shape

(29154, 650)

In [176]:
# random forests are less affected by class imbalance
rfc = RandomForestClassifier(n_estimators=50).fit(X_train, y_train)

rfc_pred = rfc.predict(X_test)

print(classification_report(y_test, rfc_pred))

              precision    recall  f1-score   support

           0       0.87      0.94      0.91      3631
           1       0.76      0.57      0.65      1169

   micro avg       0.85      0.85      0.85      4800
   macro avg       0.81      0.75      0.78      4800
weighted avg       0.84      0.85      0.84      4800



In [177]:
rfc.feature_importances_ 

array([0.01533374, 0.0056569 , 0.00460523, 0.00241395, 0.00106702,
       0.00699311, 0.0021666 , 0.00114083, 0.00238593, 0.00122409,
       0.00114047, 0.00256548, 0.00166596, 0.00121426, 0.00448479,
       0.01359379, 0.00325627, 0.00474551, 0.00151747, 0.00054069,
       0.00082325, 0.00052289, 0.00073893, 0.00124199, 0.00032308,
       0.00066082, 0.00044664, 0.00059804, 0.00062037, 0.00141899,
       0.00043511, 0.00105366, 0.0004223 , 0.00062543, 0.00056016,
       0.00065422, 0.00040565, 0.00071695, 0.00044249, 0.00047334,
       0.00046042, 0.00040078, 0.00049588, 0.00040274, 0.00052837,
       0.00038384, 0.00051384, 0.00054823, 0.00061692, 0.00049124,
       0.00134706, 0.00046779, 0.00049189, 0.00042151, 0.001411  ,
       0.00042987, 0.00039325, 0.00063405, 0.00055551, 0.00055367,
       0.00061449, 0.00053225, 0.00044723, 0.00064338, 0.0005472 ,
       0.00033611, 0.00057768, 0.00052449, 0.00037011, 0.00055051,
       0.00058788, 0.0005277 , 0.00041728, 0.00702212, 0.00039

In [178]:
# what are the predictions?
print(pd.DataFrame(rfc_pred)[0].value_counts())

0    3929
1     871
Name: 0, dtype: int64
0    3776
1    1024
Name: 0, dtype: int64


In [179]:
# try yet another classifier...
from sklearn.datasets import load_iris
from sklearn.svm import SVC

clf = SVC(gamma='scale')
clf.set_params(kernel='rbf').fit(X_train, y_train)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [181]:
clf_pred = clf.predict(X_test)

print(classification_report(y_test, clf_pred))

              precision    recall  f1-score   support

           0       0.89      0.92      0.90      3631
           1       0.71      0.63      0.67      1169

   micro avg       0.85      0.85      0.85      4800
   macro avg       0.80      0.77      0.78      4800
weighted avg       0.84      0.85      0.84      4800



In [182]:
def train_part34(model, optimizer, epochs=1):
    """
    Train a model on CIFAR-10 using the PyTorch Module API.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: (Optional) A Python integer giving the number of epochs to train for
    
    Returns: Nothing, but prints model accuracies during training.
    """
#     loss_fn = nn.MSELoss(size_average=False)
    model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        print(f'Epoch: {e}')
#             t, x, y = e, X_train, y_train
        for t, (x, y) in enumerate(loader_train):
#             print(x, y)
#             if i == 1:
#                 break
#             else:
#                 i+=1
            model.train()  # put model to training mode
            # Clear stored gradient
            model.zero_grad()

            # Initialise hidden state
            # Don't do this if you want your LSTM to be stateful
            model.hidden = model.init_hidden()
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            print(x.shape)
            print(y.shape)

            scores = model(x)
#             loss_fn = nn.LLoss()
            print(scores.shape)
#             loss = loss_fn(scores, y)
#             loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % 10 == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
#                 print(f'Val acc: {(model(X_val).max(1)[1] == torch.Tensor(y_val).to(dtype=torch.long)).sum() / len(y_val)}')
                check_accuracy_part34(loader_val, model)
                print()

In [183]:
class RandomDataset(Dataset):
    def __init__(self):
        self.train = True
        X_r = np.random.random((10000, 5, 210))
        y_r = np.random.random((10000,))
        self.len = len(X_r)
        self.x_data = X_r
        self.y_data = y_r
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len
    
class OpenposeTrainDataset(Dataset):
    def __init__(self):
        self.train = True
        self.len = len(X_train)
        self.x_data = X_train
        self.y_data = y_train
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

class OpenposeValDataset(Dataset):
    def __init__(self):
        self.train = True
        self.len = len(X_val)
        self.x_data = X_val
        self.y_data = y_val
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

class OpenposeTestDataset(Dataset):
    def __init__(self):
        self.train = False
        self.len = len(X_test)
        self.x_data = X_test
        self.y_data = y_test
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [184]:
BATCH_SIZE = 100
train_data = OpenposeTrainDataset()
test_data = OpenposeTestDataset()
val_data = OpenposeValDataset()
loader_train = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=sampler.SubsetRandomSampler(range(len(train_data))))
loader_test = DataLoader(test_data, batch_size=BATCH_SIZE, sampler=sampler.SubsetRandomSampler(range(len(test_data))))
loader_val = DataLoader(val_data, batch_size=BATCH_SIZE, sampler=sampler.SubsetRandomSampler(range(len(val_data))))

In [185]:
USE_GPU = True

print_every = 100
dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [186]:
learning_rate = 1e-3

betas = (0.9, 0.999)

# optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=betas)
optimizer = optim.SGD(lstm.parameters(), lr=learning_rate)

In [34]:
train_part34(lstm, optimizer, epochs=10)

Epoch: 0


KeyError: 14714

In [None]:
#Now try on the sequence of frames
logreg.fit((X_train.reshape(X_train.shape[0], -1)), y_train)
mlp.fit((X_train.reshape(X_train.shape[0], -1)), y_train)

In [None]:
print('Logistic regression:')
y_pred = logreg.predict((X_val.reshape(X_val.shape[0], -1)))
print(classification_report(y_val, y_pred))

print('MLP:')
y_pred = mlp.predict((X_val.reshape(X_val.shape[0], -1)))
print(classification_report(y_val, y_pred))

#THe F-scores for the positive (1) detections are in the second column.

In [None]:
df.to_json('/scratch/users/agrawalk/headcam-algo-output/alice_sample.json')

In [None]:
df.head()

In [None]:
#Sklearn LSTM

In [None]:
#Sanity checking to see if there's even any extra non-zero info to be gained from looking at surrounding frames (esp. in FN cases)
def extra_info(row):
    return 1 if np.sum(np.array(row['face_tuple'])[[0,1,3,4], :]) != 0 else 0

In [None]:
alice_fp = df.query('face_present == 0 and face_openpose == 1')
len(alice_fp)

In [None]:
extra = alice_fp.apply(extra_info, axis=1).values
extra.sum()/len(extra)

In [None]:
alice_fn = df.query('face_present == 1 and face_openpose == 0')
len(alice_fn)

In [None]:
#OK, so there's certainly some to be gained on the FN frames. Why aren't the classifiers picking up on it, then?
extra = alice_fn.apply(extra_info, axis=1).values
extra.sum()/len(extra)