In [1]:
import numpy as np
import math
import os.path
import csv
import glob
import h5py as h5py
from mlxtend.image import extract_face_landmarks
import cv2
import matplotlib.pyplot as plt
from scipy.spatial import distance
import pandas as pd

In [2]:
def getFrame(sec):
    start = 100
    vidcap.set(cv2.CAP_PROP_POS_MSEC, start + sec*1000)
    hasFrames,image = vidcap.read()
    return hasFrames, image


def eye_aspect_ratio(eye):
    A = distance.euclidean(eye[1], eye[5])
    B = distance.euclidean(eye[2], eye[4])
    C = distance.euclidean(eye[0], eye[3])
    ear = (A + B) / (2.0 * C)
    return ear


def mouth_aspect_ratio(mouth):
    A = distance.euclidean(mouth[14], mouth[18])
    C = distance.euclidean(mouth[12], mouth[16])
    mar = (A ) / (C)
    return mar

def circularity(eye):
    A = distance.euclidean(eye[1], eye[4])
    radius  = A/2.0
    Area = math.pi * (radius ** 2)
    p = 0
    p += distance.euclidean(eye[0], eye[1])
    p += distance.euclidean(eye[1], eye[2])
    p += distance.euclidean(eye[2], eye[3])
    p += distance.euclidean(eye[3], eye[4])
    p += distance.euclidean(eye[4], eye[5])
    p += distance.euclidean(eye[5], eye[0])
    return 4 * math.pi * Area /(p**2)
     

def mouth_over_eye(eye):
    ear = eye_aspect_ratio(eye)
    mar = mouth_aspect_ratio(eye)
    mouth_eye = mar/ear
    return mouth_eye
     

In [3]:
data = []
labels = []
respondant = []
for j in [1,2,3]:
    for i in [0,10]:
    #vidcap = cv2.VideoCapture('drive/My Drive/Fold5_part2/' + str(j) +'/' + str(i) + '.mp4')
        vidcap = cv2.VideoCapture('C:/Users/Antonin/OneDrive - Imperial College London/ME3/ML Projects/Demystifying ML/data_test/' + str(j) + "/" +str(i) + '.mp4')
        sec = 0
        frameRate = 1
        success, image  = getFrame(sec)
        count = 0
        while success and count < 240:
            landmarks = extract_face_landmarks(image)
            if sum(sum(landmarks)) != 0:
                count += 1
                data.append(landmarks)
                labels.append([i])
                respondant.append([j])
                sec = sec + frameRate
                sec = round(sec, 2)
                success, image = getFrame(sec)
                print(count)
            else:  
                sec = sec + frameRate
                sec = round(sec, 2)
                success, image = getFrame(sec)
                print("not detected")
            
data = np.array(data)
labels = np.array(labels).T[0]
respondant = np.array(respondant).T[0]

1
2
3
4
5
6
7
1
2
3
4
5
6
7
8
9
1
2
3
4
5
6
7
8
9
10
11
12
1
2
3
4
5
6
7
8
9
10
11
12
13


In [7]:
print(len(respondant))

41


In [4]:
features = []
for d in data:
    eye = d[36:68]
    ear = eye_aspect_ratio(eye)
    mar = mouth_aspect_ratio(eye)
    cir = circularity(eye)
    mouth_eye = mouth_over_eye(eye)
    features.append([ear, mar, cir, mouth_eye])
    
features = np.array(features).T
features.shape


(4, 41)

In [5]:
def normalization(series, ref_series):
    if type(series) != pd.Series:
        raise TypeError('Input is not a Pandas Series')
    mean = ref_series.mean()
    std = ref_series.std()
    series = (series - mean)/std
    return series

In [6]:
df = pd.DataFrame(data = {'EAR' : features[0],'MAR' : features[1],'CIR' : features[2],'MOE' : features[3], 'Label':labels, 'Respondant':respondant})


df_norm = df.copy()
df_norm = df_norm.reindex(columns = df_norm.columns.tolist() + ['normEAR', 'normMAR', 'normCIR', 'normMOE'])

for subject in df['Respondant'].unique():
     for feature in ['EAR', 'MAR', 'CIR', 'MOE']:
            
            ref_series = df[df['Respondant']==subject]
            
            ref_series = ref_series[ref_series["Label"]==10]
            # need to change the ==10 to ==0 after: see labels of data from dataset
            ref_series = ref_series[feature]
            
            series = df[df['Respondant']==subject]
            series = series[feature]
            
            norm_series = normalization(series, ref_series)
            
            column_name = 'norm' + feature
            df_norm.iloc[norm_series.index, df_norm.columns.get_loc(column_name)] = norm_series
            
            

In [8]:
'''
This block is for NO repeating values in the dataset: the sequence moves by the full length of the sequence for each different sequence
'''

LSTMinput = np.ndarray(shape = (1, 5, 8))
labels = np.ndarray(shape = (1,5,1))

feat_name = ['EAR', 'MAR', 'CIR', 'MOE', 'normEAR', 'normMAR', 'normCIR', 'normMOE']



for subject in df_norm['Respondant'].unique():
    df_norm_subject = df_norm[df_norm['Respondant']==subject]
    
    for unique_label in df_norm_subject['Label'].unique():
        
        curr_features = np.array(df_norm_subject[df_norm_subject['Label']==unique_label].drop(columns=['Label', 'Respondant']))
        curr_labels = np.array(df_norm_subject[df_norm_subject['Label']==unique_label]['Label'])
        
        while curr_features.shape[0]%5:
            curr_features = np.delete(curr_features, 0, axis = 0)
            curr_labels = np.delete(curr_labels, 0, axis = 0)
            
        curr_features = curr_features.reshape((-1, 5, 8))
        curr_labels = curr_labels.reshape((-1, 5, 1))
        LSTMinput = np.append(LSTMinput,curr_features, axis = 0)
        labels = np.append(labels,curr_labels, axis = 0)

LSTMinput = np.delete(LSTMinput, 0, axis = 0)
labels = np.delete(labels, 0, axis = 0)

new_labels = np.ndarray(shape = (labels.shape[0],1))
for i in range(0,labels.shape[0]):
    new_labels[i] = labels[i][0]
    
# The one we need to save are LSTMinput and new_labels
np.save("features.npy", LSTMinput)
np.savetxt("labels.npy", new_labels)

In [36]:
'''
This block is for repeating values in the dataset: the sequence moves by only one for each different sequence
'''

def appending(extracted:np.ndarray, output:np.ndarray, sequence_lenght: int, features:int):
    for i in range(0, len(extracted) - sequence_lenght +1):
        extrac_append = extracted[i:i+sequence_lenght].reshape((-1,sequence_lenght, features))
        output = np.append(output, extrac_append, axis = 0)
    return output

def relabel(labels:np.ndarray):
    new_labels = np.ndarray(shape = (labels.shape[0],1))
    for i in range(0,labels.shape[0]):
        new_labels[i] = labels[i][0]
    return new_labels

LSTMinput_long_tired = np.ndarray(shape = (1, 5, 8))
labels_long_tired = np.ndarray(shape = (1,5,1))

LSTMinput_long_alert = np.ndarray(shape = (1, 5, 8))
labels_long_alert = np.ndarray(shape = (1,5,1))

for subject in df_norm['Respondant'].unique():
    df_norm_subject = df_norm[df_norm['Respondant']==subject]
    
    for unique_label in df_norm_subject['Label'].unique():
    # Have to watch out the number of different labels, 
    # For now we only have two 
        
        curr_features = np.array(df_norm_subject[df_norm_subject['Label']==unique_label].drop(columns=['Label', 'Respondant']))
        curr_labels = np.array(df_norm_subject[df_norm_subject['Label']==unique_label]['Label'])

        if unique_label == 0:
            LSTMinput_long_alert = appending(curr_features, LSTMinput_long_alert, 5, 8)
            labels_long_alert = appending(curr_labels, labels_long_alert, 5, 1)
        if unique_label == 10:
            LSTMinput_long_tired = appending(curr_features, LSTMinput_long_tired, 5, 8)
            labels_long_tired = appending(curr_labels, labels_long_tired, 5, 1)


LSTMinput_long_tired = np.delete(LSTMinput_long_tired, 0, axis = 0)
labels_long_tired = np.delete(labels_long_tired, 0, axis = 0)
LSTMinput_long_alert = np.delete(LSTMinput_long_alert, 0, axis = 0)
labels_long_alert = np.delete(labels_long_alert, 0, axis = 0)

new_labels_long_alert = relabel(labels_long_alert)
new_labels_long_tired = relabel(labels_long_tired)


np.save("features_long_tired.npy", LSTMinput_long_tired)
np.savetxt("labels_long_tired.npy", new_labels_long_tired)  

np.save("features_long_alert.npy", LSTMinput_long_alert)
np.savetxt("labels_long_alert.npy", new_labels_long_alert)  

In [35]:
print(LSTMinput_long_alert)
df_norm

[[[ 2.68437746e-01  1.05013227e+00  3.26536763e-01  3.91201417e+00
    7.47834870e-02  3.49706870e-01 -1.40670530e+00 -1.40028611e-01]
  [ 1.23299240e-01  9.65003674e-01  2.69216013e-01  7.82651759e+00
   -3.00600241e+00 -1.44552362e+00 -2.11535073e+00  4.67583104e+00]
  [ 1.76502832e-01  8.54680402e-01  3.28003279e-01  4.84230417e+00
   -1.87667512e+00 -3.77207099e+00 -1.38857505e+00  1.00447060e+00]
  [ 1.72005229e-01  7.37864787e-01  1.83476556e-01  4.28978114e+00
   -1.97214360e+00 -6.23553182e+00 -3.17533117e+00  3.24723238e-01]
  [ 1.15044748e-01  9.63267676e-01  2.36570971e-01  8.37298260e+00
   -3.18121658e+00 -1.48213315e+00 -2.51893512e+00  5.34812546e+00]]

 [[ 1.23299240e-01  9.65003674e-01  2.69216013e-01  7.82651759e+00
   -3.00600241e+00 -1.44552362e+00 -2.11535073e+00  4.67583104e+00]
  [ 1.76502832e-01  8.54680402e-01  3.28003279e-01  4.84230417e+00
   -1.87667512e+00 -3.77207099e+00 -1.38857505e+00  1.00447060e+00]
  [ 1.72005229e-01  7.37864787e-01  1.83476556e-01  4

Unnamed: 0,EAR,MAR,CIR,MOE,Label,Respondant,normEAR,normMAR,normCIR,normMOE
0,0.268438,1.050132,0.326537,3.912014,0,1,0.074783,0.349707,-1.406705,-0.140029
1,0.123299,0.965004,0.269216,7.826518,0,1,-3.006002,-1.445524,-2.115351,4.675831
2,0.176503,0.85468,0.328003,4.842304,0,1,-1.876675,-3.772071,-1.388575,1.004471
3,0.172005,0.737865,0.183477,4.289781,0,1,-1.972144,-6.235532,-3.175331,0.324723
4,0.115045,0.963268,0.236571,8.372983,0,1,-3.181217,-1.482133,-2.518935,5.348125
5,0.197964,1.103524,0.273733,5.574358,0,1,-1.421123,1.475654,-2.059502,1.905088
6,0.191741,1.030619,0.273733,5.375053,0,1,-1.553217,-0.061791,-2.059502,1.65989
7,0.207614,1.050132,0.311069,5.058107,10,1,-1.2163,0.349707,-1.59793,1.269964
8,0.269231,0.977194,0.439287,3.629576,10,1,0.091617,-1.188459,-0.012792,-0.487501
9,0.25,1.111789,0.521645,4.447155,10,1,-0.316586,1.649947,1.00538,0.518334
