In [1]:
# Importing the required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from math import ceil

In [2]:
#Reading data files
trips = {}
trips['trip1_acc'] = pd.read_csv("data/1/Accelerometer.csv")
trips['trip1_lac'] = pd.read_csv("data/1/Linear_Acceleration.csv")
trips['trip1_gyr'] = pd.read_csv("data/1/Gyrometer.csv")
trips['trip1_mag'] = pd.read_csv("data/1/Magnetometer.csv")
trips['trip1_lbl'] = pd.read_csv("data/1/Truth.csv")

trips['trip2_acc'] = pd.read_csv("data/2/Accelerometer.csv")
trips['trip2_lac'] = pd.read_csv("data/2/Linear_Acceleration.csv")
trips['trip2_gyr'] = pd.read_csv("data/2/Gyrometer.csv")
trips['trip2_mag'] = pd.read_csv("data/2/Magnetometer.csv")
trips['trip2_lbl'] = pd.read_csv("data/2/Truth.csv")

trips['trip3_acc'] = pd.read_csv("data/3/Accelerometer.csv")
trips['trip3_lac'] = pd.read_csv("data/3/Linear_Acceleration.csv")
trips['trip3_gyr'] = pd.read_csv("data/3/Gyrometer.csv")
trips['trip3_mag'] = pd.read_csv("data/3/Magnetometer.csv")
trips['trip3_lbl'] = pd.read_csv("data/3/Truth.csv")

trips['trip4_acc'] = pd.read_csv("data/4/Accelerometer.csv")
trips['trip4_lac'] = pd.read_csv("data/4/Linear_Acceleration.csv")
trips['trip4_gyr'] = pd.read_csv("data/4/Gyrometer.csv")
trips['trip4_mag'] = pd.read_csv("data/4/Magnetometer.csv")
trips['trip4_lbl'] = pd.read_csv("data/4/Truth.csv")

In [3]:
# Irrelevant timestamp column dropping
# Also, determine smallest and largest window sizes for the events
min_window = 1000000
max_window = 0

for name,trip in trips.items():
    if 'timestamp' in trip.columns:
        trip.drop(['timestamp'], axis=1, inplace=True)
    
    if 'uptimeNanos' in trip.columns:
        uptimeNanos = np.array(trip['uptimeNanos'])
        timeInSec = (uptimeNanos - uptimeNanos[0])/1000000000
        trip['timeInSec'] = timeInSec
        trip.drop(['uptimeNanos'], axis=1, inplace=True)
        
    if 'event' in trip.columns:
        maxW = max(trip.finish - trip.start)
        minW = min(trip.finish - trip.start)
        
        if maxW > max_window:
            max_window = maxW
        if minW < min_window:
            min_window = minW

In [4]:
# Rounding of the window size to the ceil of the time difference between start and finish of the event 
print("Minimum event duration: ", min_window, "s   Minimum Window Size: " , ceil(min_window), "s")
min_window = ceil(min_window)
print("Maximum event duration: ", max_window, "s   Maximum Window Size: " , ceil(max_window), "s")
max_window = ceil(max_window)

Minimum event duration:  1.5999999999999979 s   Minimum Window Size:  2 s
Maximum event duration:  4.899999999999977 s   Maximum Window Size:  5 s


In [5]:
# Storing the indices of 1sec frames 
allTripPerSecIndex = {}
for name,trip in trips.items():
    if 'event' not in trip.columns:
        tmpFrame = pd.DataFrame(columns=['perSecIndex'])
        count = 0
        for i in range(0,len(trip)-1,1):
            if trip['timeInSec'][i]-count > 1:
                tmpFrame = tmpFrame.append({'perSecIndex':i-1},ignore_index=True)
                count = count + 1
        allTripPerSecIndex[name+'_perSecIndex'] = tmpFrame

In [6]:
# Function getFeatureNames
# Input:
#     s = sensor_name
#     w = window_size
# Output:
#     l = [feature names as list, feature name as dictionary]

# params = [Mean, Median, Std Deviation, Mean Abs Deviation, Min, Max, Tendency]
def getFeatureNames( s, w ):
    l = []
    c = []
    d = {}
    params = ['mean','median','sd','td']
    
    for i in range(1,w+1):
        for j in range(0,len(params),1):
            v1 = s + '_' + params[j] + '_x' + str(i)
            v2 = s + '_' + params[j] + '_y' + str(i)
            v3 = s + '_' + params[j] + '_z' + str(i)
            
            if params[j] != 'td':
                c.append(v1)
                c.append(v2)
                c.append(v3)
                
                d[v1] = 0.0
                d[v2] = 0.0
                d[v3] = 0.0
            else:
                if i != w:                    
                    c.append(v1)
                    c.append(v2)
                    c.append(v3)
                    
                    d[v1] = 0.0
                    d[v2] = 0.0
                    d[v3] = 0.0
    l.append(c)
    l.append(d)
    
    return l

In [7]:
# Considering window_size = 3
w = 3
dataSensorwise = {}
for name,tripIndex in allTripPerSecIndex.items():
    i = 0
    j = w
    end = len(tripIndex)
    l = getFeatureNames(name[6:len(name)-12],w)
    tripFeature = pd.DataFrame(columns=l[0])
    tripRawData = trips[name[:len(name)-12]]
    
    featureVector = l[1]   
    key_mean = name[6:len(name)-12] + '_mean_'
    key_median = name[6:len(name)-12] + '_median_'
    key_sd = name[6:len(name)-12] + '_sd_'
    key_td = name[6:len(name)-12] + '_td_'
    setLastInd = 0
    
    while(j<=end):    
        lastInd = setLastInd
        frameCount = 1
        
        for k in range(i,j,1):            
            ind = tripIndex.perSecIndex.values[k]
            
            featureVector[key_mean + 'x' + str(frameCount)] = tripRawData.x[lastInd:ind+1].mean()
            featureVector[key_mean + 'y' + str(frameCount)] = tripRawData.y[lastInd:ind+1].mean()
            featureVector[key_mean + 'z' + str(frameCount)] = tripRawData.z[lastInd:ind+1].mean()
            
            featureVector[key_median + 'x' + str(frameCount)] = tripRawData.x[lastInd:ind+1].median()
            featureVector[key_median + 'y' + str(frameCount)] = tripRawData.y[lastInd:ind+1].median()
            featureVector[key_median + 'z' + str(frameCount)] = tripRawData.z[lastInd:ind+1].median()
            
            featureVector[key_sd + 'x' + str(frameCount)] = tripRawData.x[lastInd:ind+1].std()
            featureVector[key_sd + 'y' + str(frameCount)] = tripRawData.y[lastInd:ind+1].std()
            featureVector[key_sd + 'z' + str(frameCount)] = tripRawData.z[lastInd:ind+1].std()                       
            
            # Set td only if it is not first frame
            if frameCount != 1:
                featureVector[key_td + 'x' + str(frameCount-1)] = featureVector[key_mean + 'x' + str(frameCount)]/featureVector[key_mean + 'x' + str(frameCount-1)]
                featureVector[key_td + 'y' + str(frameCount-1)] = featureVector[key_mean + 'y' + str(frameCount)]/featureVector[key_mean + 'y' + str(frameCount-1)]
                featureVector[key_td + 'z' + str(frameCount-1)] = featureVector[key_mean + 'z' + str(frameCount)]/featureVector[key_mean + 'z' + str(frameCount-1)]  
            else:
                setLastInd = ind;
                
            frameCount = frameCount + 1
        
        tripFeature = tripFeature.append(featureVector,ignore_index=True) 
        i = i + 1
        j = j + 1
    dataSensorwise[name[:len(name)-12]] = tripFeature       
            

In [8]:
# Merging all sensor attribute vectors into a common attribute vector
dataConsolidated = {}
dataConsolidated['trip1'] = pd.DataFrame()
dataConsolidated['trip2'] = pd.DataFrame()
dataConsolidated['trip3'] = pd.DataFrame()
dataConsolidated['trip4'] = pd.DataFrame()

for name, vec in dataSensorwise.items():
    if name[:5] == 'trip1':
        dataConsolidated['trip1'] = pd.concat([dataConsolidated['trip1'],vec],axis=1)
    elif name[:5] == 'trip2':
        dataConsolidated['trip2'] = pd.concat([dataConsolidated['trip2'],vec],axis=1)
    elif name[:5] == 'trip3':
        dataConsolidated['trip3'] = pd.concat([dataConsolidated['trip3'],vec],axis=1)
    else:
        dataConsolidated['trip4'] = pd.concat([dataConsolidated['trip4'],vec],axis=1)

In [9]:
# Appending the column with key in label frame
for name, trip in trips.items():
    if name[6:len(name)] == 'lbl':
        trip['key'] = 0

In [10]:
# Appending the column with key in data frame
for name,tripDataConsolidated in dataConsolidated.items():
    tripDataConsolidated['key'] = 0
    tripDataConsolidated['startTime'] = np.arange(start=0,step=1, stop=len(tripDataConsolidated))
    tripDataConsolidated['endTime'] = np.arange(start=3,step=1, stop=len(tripDataConsolidated)+3)
    dataConsolidated[name] = pd.merge(tripDataConsolidated,trips[name+'_lbl'],on='key',how='inner')  
    dataConsolidated[name].drop(dataConsolidated[name][(dataConsolidated[name].start<dataConsolidated[name].startTime) |
                                 (dataConsolidated[name].start>=dataConsolidated[name].endTime)].index,inplace=True)
    dataConsolidated[name].drop(['key','start','finish','startTime','endTime'],axis=1,inplace=True)

In [14]:
dataConsolidated['trip2']

Unnamed: 0,acc_mean_x1,acc_mean_y1,acc_mean_z1,acc_median_x1,acc_median_y1,acc_median_z1,acc_sd_x1,acc_sd_y1,acc_sd_z1,acc_td_x1,...,mag_mean_x3,mag_mean_y3,mag_mean_z3,mag_median_x3,mag_median_y3,mag_median_z3,mag_sd_x3,mag_sd_y3,mag_sd_z3,event
196,-0.117049,0.055384,9.724258,-0.069059,0.164277,9.770286,0.537207,0.638735,0.344404,0.238467,...,7.887233e-09,17.153253,-9.766648,1.862645e-08,16.64349,-9.984594,4.636499e-07,1.255869,1.126857,Exchange Aggressive Right Lane
210,0.076253,-0.479333,9.740968,0.098108,-0.550414,9.778791,0.376199,0.503249,0.371611,-4.863501,...,-4.509823e-08,17.40448,-10.141401,-5.215406e-08,16.803752,-10.329916,5.014392e-07,1.334846,1.342795,Exchange Aggressive Right Lane
224,-0.802893,0.661716,9.95032,-0.981835,1.068665,10.125567,1.549357,2.483017,0.623928,-0.342481,...,-4.375434e-08,16.953142,-10.731239,-3.72529e-08,16.420886,-11.418682,5.073667e-07,1.690599,1.597195,Exchange Aggressive Right Lane
323,-0.319001,0.069271,9.749618,-0.315301,0.145275,9.732222,0.562297,0.574463,0.3621,-0.093389,...,1.304913e-08,16.444904,-9.632901,1.676381e-08,15.931213,-10.00158,4.572256e-07,1.347852,1.464008,Exchange Aggressive Right Lane
337,0.387401,-0.6045,9.585968,0.335186,-0.559949,9.661123,0.570634,0.669937,0.552071,-1.409838,...,-1.068139e-08,16.990079,-9.551535,7.450581e-09,16.217625,-10.006996,4.715196e-07,1.557791,1.607449,Exchange Aggressive Right Lane
351,-1.447909,1.806653,10.334931,-1.734154,2.11732,10.344237,1.520748,1.862377,0.735733,-0.213345,...,1.302486e-08,16.986718,-10.090074,3.678724e-08,16.259067,-10.772544,4.919913e-07,1.564839,1.908481,Exchange Aggressive Right Lane
1948,-0.011369,0.017481,9.785534,0.041638,0.057235,9.763113,0.564159,0.844189,0.371372,-21.554165,...,-2.056035e-08,15.698099,-11.485863,-2.328306e-08,15.055883,-12.14283,4.633025e-07,1.36052,1.671535,Aggressive Breaking
1962,0.511269,0.446687,9.591219,0.359663,0.371019,9.507555,1.116786,1.262143,0.447008,2.598652,...,-1.473731e-08,16.907252,-9.83797,-3.352761e-08,17.273237,-9.351838,4.730659e-07,1.646075,2.385431,Aggressive Breaking
1976,2.180812,2.732962,10.320303,2.194775,2.481399,10.346748,0.894915,0.794834,0.534724,0.185941,...,-1.509655e-08,17.819242,-8.907614,-3.352761e-08,17.968228,-8.939788,4.910148e-07,0.847569,1.581213,Aggressive Breaking
2089,0.12666,0.258429,9.621932,0.140714,0.234035,9.648055,0.396463,0.487515,0.294858,0.767789,...,-5.279335e-10,15.362859,-12.651118,-1.862645e-09,15.13813,-13.186079,4.716495e-07,0.834771,1.113313,Aggressive Breaking
