In this notebook we are performing the preprocessing step:
- First we are filling all `NaN` values as -1.
- we are selecting the rows where face is detected.
- we are also selecting rows where drowsiness value is either 0 or 1

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
import pickle

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
df_original = pd.read_pickle('/content/gdrive/MyDrive/Project/Data/whole_set_selected.pkl')
df_original = df_original.fillna(-1)

In [None]:
df_original

Unnamed: 0,subject,external_factors,facial_actions,frame_no,face_detected,left_ear,right_ear,avg_ear,mar,moe,left_ec,right_ec,avg_ec,left_leb,right_leb,avg_leb,left_sop,right_sop,avg_sop,closeness,reserved_for_calibration,perclos,drowsiness
0,026,noglasses,mix,1.0,1.0,0.312348,0.390935,0.351641,0.019936,0.056695,0.505212,0.572679,0.538945,32.479984,32.845461,32.662722,0.552866,0.485899,0.519382,0.0,True,-1.0,-1
1,026,noglasses,mix,2.0,1.0,0.312348,0.353860,0.333104,0.019936,0.059850,0.505212,0.502060,0.503636,32.479984,32.605777,32.542880,0.501298,0.485899,0.493598,0.0,True,-1.0,-1
2,026,noglasses,mix,3.0,1.0,0.281113,0.369565,0.325339,0.019936,0.061279,0.469984,0.543537,0.506761,32.010095,32.605777,32.307936,0.523548,0.464338,0.493943,0.0,True,-1.0,0
3,026,noglasses,mix,4.0,1.0,0.290172,0.369565,0.329868,0.020341,0.061662,0.496282,0.534732,0.515507,32.605777,32.605777,32.605777,0.523548,0.479301,0.501424,0.0,True,-1.0,0
4,026,noglasses,mix,5.0,1.0,0.290172,0.353860,0.322016,0.040681,0.126332,0.544743,0.502060,0.523401,31.846369,32.348789,32.097579,0.501298,0.503625,0.502462,0.0,True,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567935,002,noglasses,nonsleepyCombination,2675.0,1.0,0.322438,0.366702,0.344570,0.046619,0.135296,0.519428,0.501465,0.510447,40.153337,44.334435,42.243886,0.500635,0.496381,0.498508,0.0,False,0.0,0
567936,002,noglasses,nonsleepyCombination,2676.0,1.0,0.305574,0.360779,0.333177,0.046619,0.139923,0.483056,0.487115,0.485085,39.996081,43.546340,41.771211,0.491398,0.473926,0.482662,0.0,False,0.0,0
567937,002,noglasses,nonsleepyCombination,2677.0,1.0,0.312124,0.393912,0.353018,0.057551,0.163025,0.461127,0.519349,0.490238,40.193898,43.382725,41.788312,0.518089,0.464807,0.491448,0.0,False,0.0,0
567938,002,noglasses,nonsleepyCombination,2678.0,1.0,0.321034,0.377428,0.349231,0.059428,0.170167,0.480536,0.521687,0.501112,39.862095,44.040659,41.951377,0.515470,0.475986,0.495728,0.0,False,0.0,0


In [None]:
df = df_original.loc[df_original['face_detected'] == 1]
df = df[np.logical_or(df['drowsiness']== 1, df['drowsiness']== 0)]
df = df.reset_index(drop=True)

We notice there is abnornmal 0’s in eye
feature. This occurs when the subject turns and eye is not detected hence all
feature values becomes 0. To detect this scenario, the conduction
RIGHT_EAR == 0 and LEFT_EAR != 0 is searched in dataframe. LEFT_EAR
value is selected and copied in to a newly defined EAR column. Similarly,
RIGHT_EAR is checked and updated in EAR column. This is process is repeated
for all the eye feature (EC, SOP, LEB). MOE value is updated according to the
new EAR value. After this process we drop all columns we don't need and finally
we have eight feature (EAR, MAR, MOE, EC, LEB, SOP, PERCLOS, CLOSENESS)
and one target class (DROWSINESS).

In [None]:
a=np.where(np.logical_and(df['left_ear']==0, df['right_ear']!=0))
b=np.where(np.logical_and(df['left_ear']!=0, df['right_ear']==0))

In [None]:
for i in a:
    print(df.iloc[i][['left_ear','right_ear','avg_ear']])
    
for j in b:
    print(df.iloc[j][['left_ear','right_ear','avg_ear']])

Empty DataFrame
Columns: [left_ear, right_ear, avg_ear]
Index: []
        left_ear  right_ear   avg_ear
46491   0.045038        0.0  0.022519
46492   0.045038        0.0  0.022519
47113   0.047405        0.0  0.023702
47114   0.047405        0.0  0.023702
47259   0.045038        0.0  0.022519
330908  0.082980        0.0  0.041490


In [None]:
df['EAR'] = df['avg_ear']
for i in a:
    df.loc[i,'EAR'] = df['right_ear']
for j in b:
    df.loc[i,'EAR'] = df['left_ear']

In [None]:
a=np.where(np.logical_and(df['left_ec']==0, df['right_ec']!=0))
b=np.where(np.logical_and(df['left_ec']!=0, df['right_ec']==0))
df['EC'] = df['avg_ec']
for i in a:
    df.loc[i,'EC'] = df['right_ec']
for j in b:
    df.loc[i,'EC'] = df['left_ec']
    
a=np.where(np.logical_and(df['left_leb']==0, df['right_leb']!=0))
b=np.where(np.logical_and(df['left_leb']!=0, df['right_leb']==0))
df['LEB'] = df['avg_leb']
for i in a:
    df.loc[i,'LEB'] = df['right_leb']
for j in b:
    df.loc[i,'LEB'] = df['left_leb']
    
a=np.where(np.logical_and(df['left_sop']==0, df['right_sop']!=0))
b=np.where(np.logical_and(df['left_sop']!=0, df['right_sop']==0))
df['SOP'] = df['avg_sop']
for i in a:
    df.loc[i,'SOP'] = df['right_sop']
for j in b:
    df.loc[i,'SOP'] = df['left_sop']


In [None]:
np.where(np.logical_and(z_score>3, df['avg_ear']<0.3))

(array([ 44919,  52375,  83546,  83556,  83558, 127467, 173814, 201987,
        213629, 213630, 288206, 288219, 311750, 312780, 312915, 321892,
        330908, 346841, 346842, 346843, 346845, 346846, 346847, 346848,
        346849, 346850, 346851, 346852, 346853, 346856, 346857, 346858,
        346860, 346861, 346863, 346865, 346866, 346867, 347107, 347108,
        347111, 353189, 353193, 353194, 384814, 441721, 441733, 441749,
        487102, 487104, 487105, 487107, 487109, 487111, 487112, 487117,
        487118, 501933, 501950, 502437, 502567, 502971, 506835, 514577]),)

In [None]:
print(df['mar'].describe())
print(len(np.where(df['mar']==0)[0]))

print(df['moe'].describe())
print(len(np.where(df['moe']==0)[0]))

print(df['perclos'].describe())
print(len(np.where(df['perclos']==0)[0]))

print(df['closeness'].describe())
print(len(np.where(df['closeness']==0)[0]))

count    515271.000000
mean          0.116582
std           0.174289
min           0.000000
25%           0.028571
50%           0.050000
75%           0.119178
max           1.880425
Name: mar, dtype: float64
28916
count    515271.000000
mean          0.456035
std           0.731721
min           0.000000
25%           0.100265
50%           0.188473
75%           0.456484
max          19.954631
Name: moe, dtype: float64
28916
count    515271.000000
mean         16.079448
std          21.815062
min           0.000000
25%           0.662252
50%           6.622517
75%          22.516556
max         100.000000
Name: perclos, dtype: float64
124050
count    515271.000000
mean          0.169425
std           0.375128
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           1.000000
Name: closeness, dtype: float64
427971


In [None]:
df.rename(columns={"mar": "MAR", "moe": "MOE", "perclos": "PERCLOS", "closeness": "CLOSENESS", "drowsiness":"DROWSINESS"}, inplace=True)

In [None]:
df['MOE'] = df ['MAR'] / df['EAR']


Two normalisation method is used to normalise the database; Subject-wise normalisation and column-wise normalisation. For subject wise normalisation, we calculate the mean and standard deviation of the first 90 frames of the alert video then using this value we normalize the relevant subject’s all video by this method all values in the dataframe adapts to the subjects and for column-wise noramlization we calculate mean and standard deviation each feature n

In [None]:
user_list = list(df['subject'].unique())

list_of_all_original=[]
for user in user_list:
    list_of_all_original.append(df.groupby('subject').get_group(user))

df_nonsleepycombination_and_mix = df[df['facial_actions'].isin(['nonsleepyCombination','mix'])]


list_of_all_groups=[]
for user in user_list:
    list_of_all_groups.append(df_nonsleepycombination_and_mix.groupby('subject').get_group(user))


list_of_all_first_90 = []
for user in user_list:
    list_of_all_first_90.append(df_nonsleepycombination_and_mix.groupby('subject').get_group(user)[:90])

In [None]:
en_son_df_list=[]
for i in range(len(list_of_all_first_90)):
    scaler = StandardScaler()
    scaler.fit(list_of_all_first_90[i].loc[ : , ["left_ear", "right_ear", "avg_ear", 
                                              "left_ec", "right_ec","avg_ec", "left_leb", "right_leb", 
                                              "avg_leb","left_sop", "right_sop", "avg_sop",
                                              "EAR","EC","LEB","SOP","MAR","MOE"]])
                                              
    son_df=pd.DataFrame(scaler.transform(list_of_all_original[i].loc[ : , ["left_ear", "right_ear", "avg_ear", 
                                              "left_ec", "right_ec","avg_ec", "left_leb", "right_leb", 
                                              "avg_leb","left_sop", "right_sop", "avg_sop",
                                              "EAR","EC","LEB","SOP","MAR","MOE"]]))
    
    
    
    son_df.columns=["n_left_ear", "n_right_ear", "n_avg_ear", 
                    "n_left_ec", "n_right_ec","n_avg_ec", "n_left_leb", "n_right_leb", 
                    "n_avg_leb","n_left_sop", "n_right_sop", "n_avg_sop",
                    "n_EAR","n_EC","n_LEB","n_SOP","n_MAR","n_MOE"]
    
    
    son_df['subject']=user_list[i]    
    en_son_df_list.append(son_df[91:]) 

In [None]:
for i in range(len(list_of_all_first_90)):
    mean=list_of_all_first_90[i].loc[:,'EAR'].mean()
    std=list_of_all_first_90[i].loc[:,'EAR'].std()
    val = 0.21
    n_val = (val-mean)/std
    print (mean, std, val, n_val)
    en_son_df_list[i].loc[:,'n_021'] = n_val


0.35745161284348964 0.014626693719880127 0.21 -10.080994082967512
0.3945510201395345 0.03020155188684142 0.21 -6.110646924072202
0.2894098238516249 0.013548116784783912 0.21 -5.861318226959132
0.3136138886383511 0.019765966091898002 0.21 -5.2420351303153385
0.42981178615095966 0.043482331523236696 0.21 -5.055197788405012
0.28575420815603636 0.0390060163881354 0.21 -1.9421159905752075
0.3566746475362328 0.015601553860940174 0.21 -9.401284567138235
0.2681638503598719 0.04498556110173819 0.21 -1.2929448679839746
0.2907671210510941 0.017794115315760643 0.21 -4.538979298372697
0.3513509799995524 0.02424849756564409 0.21 -5.829267550160393
0.3078986680892205 0.032566575159377296 0.21 -3.006108797443852
0.26047707760135463 0.019173367515308074 0.21 -2.632666252344748
0.3091624268269047 0.023737524118798006 0.21 -4.177454494859339
0.341465479412411 0.01741255689096828 0.21 -7.550038758558247
0.36848770148874316 0.033479463267001686 0.21 -4.733878205417742
0.2532594769290076 0.03025084254629510

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [None]:
con_df = pd.concat(en_son_df_list)

merged = pd.concat([df.reset_index(drop=True), con_df.reset_index(drop=True)], axis=1)

In [None]:
con_df

Unnamed: 0,n_left_ear,n_right_ear,n_avg_ear,n_left_ec,n_right_ec,n_avg_ec,n_left_leb,n_right_leb,n_avg_leb,n_left_sop,n_right_sop,n_avg_sop,n_EAR,n_EC,n_LEB,n_SOP,n_MAR,n_MOE,subject,n_021
91,0.564157,2.799494,2.117066,0.125826,1.504543,1.196686,0.321208,0.297037,0.362667,2.028623,0.267216,1.621183,2.117066,1.196686,0.362667,1.621183,-0.231568,-0.365698,026,-10.080994
92,0.618872,-0.517150,-0.025824,1.746788,-0.291571,0.667961,-0.825043,-1.278797,-1.270528,-0.325066,1.691583,0.585689,-0.025824,0.667961,-1.270528,0.585689,1.432530,1.437968,026,-10.080994
93,0.564157,1.606928,1.336608,1.721671,0.143878,0.983046,-0.746462,-1.278797,-1.229559,0.692123,1.691583,1.333265,1.336608,0.983046,-1.229559,1.333265,-0.231568,-0.317612,026,-10.080994
94,0.618872,0.623665,0.720766,0.102693,-0.782418,-0.536902,1.046612,-1.278797,-0.294733,-0.325066,0.267216,-0.108646,0.720766,-0.536902,-0.294733,-0.108646,0.492313,0.425032,026,-10.080994
95,0.618872,0.623665,0.720766,0.102693,-0.563624,-0.372174,-1.134845,-0.489410,-0.913282,-0.325066,0.267216,-0.108646,0.720766,-0.372174,-0.913282,-0.108646,-1.967199,-1.962194,026,-10.080994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26333,1.822746,1.980293,1.948455,1.407113,0.920969,1.245612,2.713426,8.197572,5.017064,1.245417,1.489753,1.432223,1.948455,1.245612,5.017064,1.432223,-0.947946,-1.307220,002,-1.117563
26334,1.392107,1.862187,1.688336,0.768489,0.685833,0.782230,2.650546,7.631427,4.756427,1.018687,0.861984,0.995884,1.688336,0.782230,4.756427,0.995884,-0.947946,-1.288132,002,-1.117563
26335,1.559361,2.522866,2.141315,0.383476,1.214020,0.876383,2.729644,7.513891,4.765857,1.673883,0.607064,1.237828,2.141315,0.876383,4.765857,1.237828,-0.801257,-1.192818,002,-1.117563
26336,1.786895,2.194171,2.054865,0.724248,1.252332,1.075052,2.596970,7.986532,4.855773,1.609581,0.919574,1.355670,2.054865,1.075052,4.855773,1.355670,-0.776074,-1.163353,002,-1.117563


In [None]:
merged.drop(columns=['CLOSENESS'], inplace=True)
merged.loc[:,'CLOSENESS']=np.where(merged['n_EAR'] < merged['n_021'], 1,0)


In [None]:
merged = merged.fillna(-1)
merged

Unnamed: 0,subject,external_factors,facial_actions,frame_no,face_detected,left_ear,right_ear,avg_ear,MAR,MOE,left_ec,right_ec,avg_ec,left_leb,right_leb,avg_leb,left_sop,right_sop,avg_sop,reserved_for_calibration,PERCLOS,DROWSINESS,EAR,EC,LEB,SOP,n_left_ear,n_right_ear,n_avg_ear,n_left_ec,n_right_ec,n_avg_ec,n_left_leb,n_right_leb,n_avg_leb,n_left_sop,n_right_sop,n_avg_sop,n_EAR,n_EC,n_LEB,n_SOP,n_MAR,n_MOE,subject.1,n_021,CLOSENESS
0,026,noglasses,mix,3.0,1.0,0.281113,0.369565,0.325339,0.019936,0.061279,0.469984,0.543537,0.506761,32.010095,32.605777,32.307936,0.523548,0.464338,0.493943,True,0.0,0,0.325339,0.506761,32.307936,0.493943,0.564157,2.799494,2.117066,0.125826,1.504543,1.196686,0.321208,0.297037,0.362667,2.028623,0.267216,1.621183,2.117066,1.196686,0.362667,1.621183,-0.231568,-0.365698,026,-10.080994,0
1,026,noglasses,mix,4.0,1.0,0.290172,0.369565,0.329868,0.020341,0.061662,0.496282,0.534732,0.515507,32.605777,32.605777,32.605777,0.523548,0.479301,0.501424,True,0.0,0,0.329868,0.515507,32.605777,0.501424,0.618872,-0.517150,-0.025824,1.746788,-0.291571,0.667961,-0.825043,-1.278797,-1.270528,-0.325066,1.691583,0.585689,-0.025824,0.667961,-1.270528,0.585689,1.432530,1.437968,026,-10.080994,0
2,026,noglasses,mix,5.0,1.0,0.290172,0.353860,0.322016,0.040681,0.126332,0.544743,0.502060,0.523401,31.846369,32.348789,32.097579,0.501298,0.503625,0.502462,True,0.0,0,0.322016,0.523401,32.097579,0.502462,0.564157,1.606928,1.336608,1.721671,0.143878,0.983046,-0.746462,-1.278797,-1.229559,0.692123,1.691583,1.333265,1.336608,0.983046,-1.229559,1.333265,-0.231568,-0.317612,026,-10.080994,0
3,026,noglasses,mix,6.0,1.0,0.312348,0.395490,0.353919,0.030029,0.084846,0.505212,0.534889,0.520050,32.141548,33.754747,32.948147,0.529870,0.485899,0.507885,True,0.0,0,0.353919,0.520050,32.948147,0.507885,0.618872,0.623665,0.720766,0.102693,-0.782418,-0.536902,1.046612,-1.278797,-0.294733,-0.325066,0.267216,-0.108646,0.720766,-0.536902,-0.294733,-0.108646,0.492313,0.425032,026,-10.080994,0
4,026,noglasses,mix,7.0,1.0,0.296730,0.390935,0.343833,0.029361,0.085393,0.466081,0.583190,0.524635,31.513423,32.795714,32.154569,0.552866,0.464338,0.508602,True,0.0,0,0.343833,0.524635,32.154569,0.508602,0.618872,0.623665,0.720766,0.102693,-0.563624,-0.372174,-1.134845,-0.489410,-0.913282,-0.325066,0.267216,-0.108646,0.720766,-0.372174,-0.913282,-0.108646,-1.967199,-1.962194,026,-10.080994,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515266,002,noglasses,nonsleepyCombination,2675.0,1.0,0.322438,0.366702,0.344570,0.046619,0.135296,0.519428,0.501465,0.510447,40.153337,44.334435,42.243886,0.500635,0.496381,0.498508,False,0.0,0,0.344570,0.510447,42.243886,0.498508,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1,-1.000000,0
515267,002,noglasses,nonsleepyCombination,2676.0,1.0,0.305574,0.360779,0.333177,0.046619,0.139923,0.483056,0.487115,0.485085,39.996081,43.546340,41.771211,0.491398,0.473926,0.482662,False,0.0,0,0.333177,0.485085,41.771211,0.482662,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1,-1.000000,0
515268,002,noglasses,nonsleepyCombination,2677.0,1.0,0.312124,0.393912,0.353018,0.057551,0.163025,0.461127,0.519349,0.490238,40.193898,43.382725,41.788312,0.518089,0.464807,0.491448,False,0.0,0,0.353018,0.490238,41.788312,0.491448,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1,-1.000000,0
515269,002,noglasses,nonsleepyCombination,2678.0,1.0,0.321034,0.377428,0.349231,0.059428,0.170167,0.480536,0.521687,0.501112,39.862095,44.040659,41.951377,0.515470,0.475986,0.495728,False,0.0,0,0.349231,0.501112,41.951377,0.495728,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1,-1.000000,0


In [None]:
merged.to_pickle('/content/gdrive/MyDrive/Project/Data/whole_set_selected_preprocessed_normalized_fixed.pkl')