In [1]:
# basic
import pandas as pd
import numpy as np

# graphical 
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# interactive
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

# Preprocessing

This notebook contains the steps taken in preprocessing the data for our final model. We begin by finding common trips among device ids as well as isolating specific interactions for use in our model. Our goal is to reduce our dataframes down to one, where each row represents a summary of the trip. 

## Part 1: Loading Individual Dataframes 

We begin by loading each csv into individual dataframes as done in EDA. Since these csv files are quite large, we subset only the necessary columns from each to keep run time and memory requrements down. Below are all of the columns in each of the relevant dataframes that were found in the EDA section. We then select a subset of those columns to load into our dataframes. 

In [2]:
# All Columns
all_col_dict = {
    "EvtWarn" : ['Device', 
                 'Trip', 
                 'Time', 
                 'RvDevice', 
                 'NativeFlag', 
                 'LocalTimeMS', 
                 'PsId', 
                 'RvBasicVehClass', 
                 'RvRandomId', 
                 'AlertLevel', 
                 'EventAppId', 
                 'RvClass', 
                 'RvLatitude', 
                 'RvLongitude', 
                 'RvHeading', 
                 'RvElevation', 
                 'RvSpeed', 
                 'RvBrakeStatus', 
                 'RvYawRate', 
                 'RvLongitudinalAccel', 
                 'RvTurnSignal', 
                 'RvEventFlags', 
                 'RvRange', 
                 'RvRangeRate', 
                 'RvLongOffset', 
                 'RvLatOffset', 
                 'HvLatitude', 
                 'HvLongitude', 
                 'HvHeading', 
                 'HvElevation', 
                 'HvSpeed', 
                 'HvBrakeStatus', 
                 'HvYawRate', 
                 'HvLongitudinalAccel', 
                 'HvTurnSignal'],
    
    "Host" : ['Device', 
              'Trip', 
              'Time', 
              'NativeFlag', 
              'LocalTimeMS', 
              'Latitude', 
              'Longitude', 
              'Elevation', 
              'Heading', 
              'GpsSpeed', 
              'GpsFixType', 
              'GpsSemiMajAxis', 
              'GpsSemiMinAxis', 
              'GpsOrientSemiMaj', 
              'BrakeStatus', 
              'Speed', 
              'YawRate', 
              'LongAccel', 
              'ThrottlePosPct', 
              'SteerAngle', 
              'TurnSignal', 
              'Headlamp', 
              'Wiper', 
              'TransState', 
              'StabilityControlStatus', 
              'ABSStatus', 
              'TracControlStatus', 
              'ClosestIntersectId', 
              'Distance'],
    
    "RvBsm" : ['Device', 
               'Trip', 
               'Time', 
               'RvDevice', 
               'NativeFlag', 
               'LocalTimeMS', 
               'BsmPsId', 
               'BasicVehClass', 
               'RvRandomId', 
               'Latitude', 
               'Longitude', 
               'Elevation', 
               'Heading', 
               'GpsSpeed', 
               'BrakeStatus', 
               'YawRate', 
               'LongAccel'],
    
    "Spat" : ['Device', 
              'Trip', 
              'Time', 
              'NativeFlag', 
              'LocalTimeMS', 
              'IntersectId', 
              'LaneId', 
              'MovementPhase', 
              'SignalGroupId', 
              'MinEndTime'] 
}

for name in all_col_dict:
    print('\nOriginal', name, 'Columns: ( length:', len(all_col_dict[name]), ')\n', all_col_dict[name])


Original EvtWarn Columns: ( length: 35 )
 ['Device', 'Trip', 'Time', 'RvDevice', 'NativeFlag', 'LocalTimeMS', 'PsId', 'RvBasicVehClass', 'RvRandomId', 'AlertLevel', 'EventAppId', 'RvClass', 'RvLatitude', 'RvLongitude', 'RvHeading', 'RvElevation', 'RvSpeed', 'RvBrakeStatus', 'RvYawRate', 'RvLongitudinalAccel', 'RvTurnSignal', 'RvEventFlags', 'RvRange', 'RvRangeRate', 'RvLongOffset', 'RvLatOffset', 'HvLatitude', 'HvLongitude', 'HvHeading', 'HvElevation', 'HvSpeed', 'HvBrakeStatus', 'HvYawRate', 'HvLongitudinalAccel', 'HvTurnSignal']

Original Host Columns: ( length: 29 )
 ['Device', 'Trip', 'Time', 'NativeFlag', 'LocalTimeMS', 'Latitude', 'Longitude', 'Elevation', 'Heading', 'GpsSpeed', 'GpsFixType', 'GpsSemiMajAxis', 'GpsSemiMinAxis', 'GpsOrientSemiMaj', 'BrakeStatus', 'Speed', 'YawRate', 'LongAccel', 'ThrottlePosPct', 'SteerAngle', 'TurnSignal', 'Headlamp', 'Wiper', 'TransState', 'StabilityControlStatus', 'ABSStatus', 'TracControlStatus', 'ClosestIntersectId', 'Distance']

Original Rv

In [3]:
# Selected Columns
evt_cols = ['Device', 
            'Trip', 
            'Time', 
            'RvDevice', 
            'AlertLevel', 
            'EventAppId',
            'RvBasicVehClass', 
            'RvLatitude', 
            'RvLongitude', 
            'RvHeading', 
            'RvSpeed', 
            'RvBrakeStatus', 
            'RvYawRate', 
            'RvLongitudinalAccel', 
            'RvEventFlags', 
            'RvLongOffset', 
            'RvLatOffset', 
            'HvLatitude', 
            'HvLongitude', 
            'HvHeading', 
            'HvSpeed', 
            'HvBrakeStatus']

host_cols = ['Device', 
             'Trip', 
             'Time',  
             'Latitude', 
             'Longitude', 
             'Heading', 
             'BrakeStatus', 
             'GpsSpeed', 
             'YawRate', 
             'LongAccel', 
             'Speed', 
             'Distance']

rvbsm_cols = ['Device', 
              'Trip', 
              'Time',  
              'BasicVehClass',
              'Latitude', 
              'Longitude',
              'Heading', 
              'GpsSpeed', 
              'BrakeStatus', 
              'YawRate', 
              'LongAccel']

spat_cols = ['Device', 
             'Trip', 
             'Time', 
             'IntersectId', 
             'LaneId', 
             'MovementPhase', 
             'SignalGroupId', 
             'MinEndTime']

selected_col_dict = {
    "EvtWarn" : evt_cols,
    "Host" : host_cols,
    "RvBsm" : rvbsm_cols,
    "Spat" : spat_cols
}

for name in selected_col_dict:
    print('\nSelected', name, 'Columns: ( length:', len(selected_col_dict[name]), ')\n', selected_col_dict[name])


Selected EvtWarn Columns: ( length: 22 )
 ['Device', 'Trip', 'Time', 'RvDevice', 'AlertLevel', 'EventAppId', 'RvBasicVehClass', 'RvLatitude', 'RvLongitude', 'RvHeading', 'RvSpeed', 'RvBrakeStatus', 'RvYawRate', 'RvLongitudinalAccel', 'RvEventFlags', 'RvLongOffset', 'RvLatOffset', 'HvLatitude', 'HvLongitude', 'HvHeading', 'HvSpeed', 'HvBrakeStatus']

Selected Host Columns: ( length: 12 )
 ['Device', 'Trip', 'Time', 'Latitude', 'Longitude', 'Heading', 'BrakeStatus', 'GpsSpeed', 'YawRate', 'LongAccel', 'Speed', 'Distance']

Selected RvBsm Columns: ( length: 11 )
 ['Device', 'Trip', 'Time', 'BasicVehClass', 'Latitude', 'Longitude', 'Heading', 'GpsSpeed', 'BrakeStatus', 'YawRate', 'LongAccel']

Selected Spat Columns: ( length: 8 )
 ['Device', 'Trip', 'Time', 'IntersectId', 'LaneId', 'MovementPhase', 'SignalGroupId', 'MinEndTime']


In [4]:
# Load the data (I am leaving the unused csv files in our dataset commented out)

#Summary = pd.read_csv("../Data/Summary.csv")

print("reading 'EvtWarn.csv'...")
evtwarn = pd.read_csv("../Data/EvtWarn.csv", usecols = evt_cols)
print("reading 'Host.csv'...")
host = pd.read_csv("../Data/Host.csv", usecols = host_cols)
print("reading 'RvBsm.csv'...")
rvbsm = pd.read_csv("../Data/RvBsm.csv", usecols = rvbsm_cols)
print("reading 'Spat.csv'...")
spat = pd.read_csv("../Data/Spat.csv")
print("done!")

#RvZone = pd.read_csv("../Data/RvZone.csv")
#RvBasicVehClass = pd.read_csv("../Data/RvBasicVehClass.csv")

reading 'EvtWarn.csv'...
reading 'Host.csv'...
reading 'RvBsm.csv'...
reading 'Spat.csv'...
done!


### Resulting DataFrames
We will first call the interactive function from the EDA, which will be used throughout this notebook as well.  

In [5]:
def df_interact(df):
    '''
    Outputs sliders that show rows and columns of df
    '''
    def peek(row=0, col=0):
        return df.iloc[row:row + 5, col:col + 6]
    interact(peek, row=(0, len(df), 5), col=(0, len(df.columns) - 6))
    print('({} rows, {} columns) total'.format(df.shape[0], df.shape[1]))

#### EvtWarn

In [6]:
df_interact(evtwarn)

interactive(children=(IntSlider(value=0, description='row', max=2461, step=5), IntSlider(value=0, description=…

(2461 rows, 22 columns) total


#### Host

In [7]:
df_interact(host)

interactive(children=(IntSlider(value=0, description='row', max=11418494, step=5), IntSlider(value=0, descript…

(11418494 rows, 12 columns) total


#### RvBsm

In [8]:
df_interact(rvbsm)

interactive(children=(IntSlider(value=0, description='row', max=648149, step=5), IntSlider(value=0, descriptio…

(648149 rows, 11 columns) total


#### Spat

In [9]:
df_interact(spat)

interactive(children=(IntSlider(value=0, description='row', max=56568, step=5), IntSlider(value=0, description…

(56568 rows, 10 columns) total


## Part 2: Filtering Data
In this section, we filter the data down to include only the driving scene we desire. We will:

1. Select only IMA (intersection movement assist) warnings using `EvtWarn`
2. Set a time range for analysis of that warning
3. Filter `RvBsm` and `Host` for data within that time range
 

In [20]:
# Get time range for each event
evtwarn_ima = evtwarn[(evtwarn["EventAppId"] == 2)]
evt_times = evtwarn_ima.loc[:, ["Trip", "Device", "Time"]].groupby(["Device", "Trip"]).agg(['min', 'max'])
evt_times.columns = evt_times.columns.droplevel(0)

evt_times
# Increase range per event to include 2 seconds before and 5 seconds after the events were observed
evt_times['min'] = evt_times['min'] - 200
evt_times['max'] = evt_times['max'] + 500


# Set the unique device, trip, event combination as index
#evt_times.reset_index(inplace = True)
#evt_times['device_trip_event'] = evt_times['Device'].astype(str) +'_'+ evt_times['Trip'].astype(str) +'_'+ evt_times['RvEventFlags'].astype(str)
#evt_times.set_index('device_trip_event', inplace = True)
#evt_times.head()


In [14]:
# Get only IMA warnings
evtwarn_ima = evtwarn[(evtwarn["EventAppId"] == 2)]

# uncomment the following linto note that 356 of these are informs and only 126 are warnings
# evtwarn_ima.groupby(["AlertLevel"]).count()

# for now we will continue with both, but we can get just warnings by changing the first line to 
# evt_warn_ima = evtwarn[(evtwarn["EventAppId"] == 2) & (evtwarn["AlertLevel"] == 3)]

# create groupby object, 
evt_times = evtwarn.loc[:, ["Trip", "Device", "EventAppId", "Time"]].groupby(["Trip", "Device", 'EventAppId'])
#evt_times.groups

Unnamed: 0_level_0,Device,Trip,Time,RvDevice,RvBasicVehClass,EventAppId,RvLatitude,RvLongitude,RvHeading,RvSpeed,...,RvYawRate,RvLongitudinalAccel,RvEventFlags,RvLongOffset,RvLatOffset,HvLatitude,HvLongitude,HvHeading,HvSpeed,HvBrakeStatus
AlertLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,356,356,356,356,356,356,356,356,356,356,...,356,356,356,356,356,356,356,356,356,356
3,126,126,126,126,126,126,126,126,126,126,...,126,126,126,126,126,126,126,126,126,126


In [94]:
# pull that into a min dictionary and a max dictionary
min_dict = {}
max_dict = {}

for event in list(evt_times.index):
    min_dict[event] = evt_times.loc[event, 'min']
    max_dict[event] = evt_times.loc[event, 'max']

def filter_times(df):
    (df['Time'] >  
df.groupby(['Device', 'Trip', 'Event']).filter(lambda x: x['Time'])

SyntaxError: unexpected EOF while parsing (<ipython-input-94-c9d71b613bd6>, line 11)

## Part 3:  Classify Reactive vs. Nonreactive
In this section, we classify a drivers response as reactive versus nonreactive. Do accomplish this we use the `BrakeStatus` feature in the `Host` dataframe.

For our model we use the following classification scheme:

| Time from Warning Issued to Brakes Applied | Reactivity  | Integer Representation |
|--------------------------------------------|-------------|------------------------|
| > 1 second                                 | Nonreactive | 0                      |
| <= 1 second                                | Reactive    | 1                      |

We make that determination by:

1. 
2. 
3. 

__\* _Future Goal_ \*__ 

1. Use __Host__ lane changes as well to determine reactivity of the driver 
2. Remove the situations where __Remote Vehicle__ changes lanes upon issuing warning (utilize the `RvBrakeStatus` feature in `RvBsm`) 

## Part 4: Merge Data

To Merge the data, we first need to create a unique identifier in each dataframe. In our [EDA](../EDA) notebooks, we found that the unique identifier for each row is made up of:

    1. Device: The host vehicle ID
    2. Trip: The trip number for that device
    3. Time: The amount of time since the start of the trip

To get a better idea of how this should be done, we first look at an individual device-trip pair, then generalize to the entire dataset. 

### A) Individual Trip Example

In [62]:
## To see the results for another device-trip pair, change the following values
device = 2004
trip = 12
evtwarn[(evtwarn['Device'] == 2004) & (evtwarn['Trip'] == 12)]
when there is interaction: 10 hz 
with no interaction: 2 hz

Unnamed: 0,Device,Trip,Time,RvDevice,NativeFlag,LocalTimeMS,RvBasicVehClass,RvLatitude,RvLongitude,RvHeading,...,RvYawRate,RvLongitudinalAccel,RvEventFlags,RvLongOffset,RvLatOffset,HvLatitude,HvLongitude,HvHeading,HvSpeed,HvBrakeStatus
0,2004,12,48150,2494,1,80559500,0,40.258754,-83.463172,110.25,...,0.0,0.16,0,-13.039368,-4.904795,40.258686,-83.463096,110.27,31.577778,0
1,2004,12,48250,2494,1,80560500,0,40.258647,-83.46279,110.25,...,0.0,-0.02,0,-10.674012,-5.043336,40.258585,-83.462745,110.43,31.611113,0
2,2004,12,48350,2494,1,80561500,0,40.258539,-83.462409,110.175,...,0.0,0.2,0,-8.518434,-5.195783,40.258485,-83.462393,110.53,31.622221,0
3,2004,12,48450,2494,1,80562500,0,40.258431,-83.462028,110.35,...,0.0,-0.18,0,-0.706004,-5.365892,40.258383,-83.462042,110.57,31.561111,0


In [26]:
# Get data for the specific trip on the specified device
host_data = host[(host['Device'] == device) & (host['Trip'] == trip)].reset_index().drop(columns=['index'])
rvbsm_data = rvbsm[(rvbsm['Device'] == device) & (rvbsm['Trip'] == trip)].reset_index().drop(columns=['index'])
evt_data = evtwarn[(evtwarn['Device'] == device) & (evtwarn['Trip'] == trip)].reset_index().drop(columns=['index'])
spat_data = spat[(spat['Device'] == device) & (spat['Trip'] == trip)].reset_index().drop(columns=['index'])

In [27]:
# Create unique identifier for each record based on Time 
host_data['merge_id'] = host_data['Device'].astype(str) +'_'+ host_data['Trip'].astype(str)  +'_'+ host_data['Time'].astype(str)
rvbsm_data['merge_id'] = rvbsm_data['Device'].astype(str) +'_'+ rvbsm_data['Trip'].astype(str)  +'_'+ rvbsm_data['Time'].astype(str)
evt_data['merge_id'] = evt_data['Device'].astype(str) +'_'+ evt_data['Trip'].astype(str)  +'_'+ evt_data['Time'].astype(str)
spat_data['merge_id'] = spat_data['Device'].astype(str) +'_'+ spat_data['Trip'].astype(str)  +'_'+ spat_data['Time'].astype(str)

In [60]:
# Set unique identifier as index
host_data = host_data.set_index('merge_id')
rvbsm_data = rvbsm_data.set_index('merge_id')
evt_data = evt_data.set_index('merge_id')
spat_data = spat_data.set_index('merge_id')

evt_data

KeyError: 'merge_id'

In [29]:
# Join Tables
ht_ = host_data.join(rvbsm_data,lsuffix='_host',rsuffix='_rvbsm').join(evt_data,rsuffix='_evtwarn').join(spat_data,rsuffix='_spat')

# View Resulting DataFrame
df_interact(ht_)

interactive(children=(IntSlider(value=0, description='row', max=2236, step=5), IntSlider(value=0, description=…

(2236 rows, 59 columns) total


In [30]:
# NOTE TO FIX: So many null values...
ht_.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2236 entries, 2004_12_20450 to 2004_12_136050
Data columns (total 59 columns):
Device_host            2236 non-null int64
Trip_host              2236 non-null int64
Time_host              2236 non-null int64
NativeFlag_host        2236 non-null int64
LocalTimeMS_host       2236 non-null int64
Latitude_host          2236 non-null float64
Longitude_host         2236 non-null float64
Heading_host           2236 non-null float64
GpsSpeed_host          2236 non-null float64
BrakeStatus_host       2236 non-null int64
Speed                  2236 non-null float64
YawRate_host           2236 non-null float64
LongAccel_host         2236 non-null float64
Distance               2236 non-null float64
Device_rvbsm           1503 non-null float64
Trip_rvbsm             1503 non-null float64
Time_rvbsm             1503 non-null float64
NativeFlag_rvbsm       1503 non-null float64
LocalTimeMS_rvbsm      1503 non-null float64
BasicVehClass          1503 non-n

### B) All Trips
Using the same method on all the trips at once would generate a massive dataframe that would be difficult to work with. Instead, we do the following:

    1. Identify the beginning and end times of each individual event
    2. 
    
Erima

In [18]:
# To fit with the data dictionary and to prevent confusion, the names used 
    # for the RvBsm columns will be changed as follows:
rvbsm.columns = ['Device', 'Trip', 'Time', 'NativeFlag', 'LocalTimeMS', 'RvBasicVehClass', 'RvLatitude', 'RvLongitude', 'RvHeading', 'RvGpsSpeed', 'RvBrakeStatus', 'RvYawRate', 'RvLongAccel']

 \* __Move the following cell to EDA__ \*

In [42]:
# These are the event flags in our dataset and what they mean:
evt_flags = {
    'Hazard Lights': [1, 0, 1, 0, 0],
    'Stop Line Violation': [0, 0, 1, 0, 0],
    'ABS Activated': [0, 0, 1, 0, 0],
    'Traction Control Loss': [0, 0, 1, 0, 0],
    'Stability Control Activated': [0, 0, 1, 0, 0],
    'Hazardous Materials': [0, 0, 1, 0, 0],
    'Reserved': [0, 0, 1, 0, 0],
    'Hard Braking': [0, 1, 1, 0, 1],
    'Air Bag Deployed': [0, 0, 0, 1, 1]
    
}
evt_flags = pd.DataFrame(evt_flags, index = [0, 128, 255, 4096, 4224])
evt_flags["Count in EvtWarn"] = evtwarn["RvEventFlags"].value_counts()
evt_flags

Unnamed: 0,Hazard Lights,Stop Line Violation,ABS Activated,Traction Control Loss,Stability Control Activated,Hazardous Materials,Reserved,Hard Braking,Air Bag Deployed,Count in EvtWarn
0,1,0,0,0,0,0,0,0,0,1093
128,0,0,0,0,0,0,0,1,0,30
255,1,1,1,1,1,1,1,1,0,1318
4096,0,0,0,0,0,0,0,0,1,14
4224,0,0,0,0,0,0,0,1,1,6


In [51]:
list(evt_times.index)

['2008_5_255',
 '2008_8_0',
 '2527_9_255',
 '2720_9_0',
 '2720_11_255',
 '2004_12_0',
 '2233_12_255',
 '2533_14_255',
 '2969_16_255',
 '2584_18_0',
 '2008_20_0',
 '2331_20_255',
 '2999_20_0',
 '2999_24_255',
 '2008_25_0',
 '2008_26_0',
 '2496_29_255',
 '2527_30_255',
 '2627_33_255',
 '2348_35_0',
 '2720_36_255',
 '2858_39_0',
 '2858_39_255',
 '2584_43_0',
 '2627_43_0',
 '2627_43_128',
 '2496_46_255',
 '2858_46_255',
 '2533_47_255',
 '2858_50_0',
 '2858_50_255',
 '2496_52_0',
 '2858_52_255',
 '2004_54_255',
 '2720_54_255',
 '2004_55_0',
 '2004_55_255',
 '2496_55_255',
 '2627_55_255',
 '2936_55_255',
 '2720_56_255',
 '2999_58_0',
 '2008_61_0',
 '2008_61_255',
 '2941_67_0',
 '2533_68_255',
 '2533_70_255',
 '2017_81_0',
 '2496_83_255',
 '2496_94_255',
 '2496_95_255',
 '2527_96_0',
 '2496_99_255',
 '2999_99_0',
 '2496_100_0',
 '2496_100_255',
 '2496_102_0',
 '2804_111_0',
 '2999_113_0',
 '2720_115_255',
 '2004_119_128',
 '2588_122_255',
 '2527_123_255',
 '2941_124_0',
 '2969_129_255',
 '233

In [53]:
print("working on host...")
host['unique_trip'] = host['Device'].astype(str) +'_'+ host['Trip'].astype(str) +'-'+ host['Time'].astype(str)
print(' column added. \nresetting index...')
host = host.set_index('unique_trip')
print(' index reset')

print("working on rvbsm ...")
rvbsm['unique_trip'] = rvbsm['Device'].astype(str) +'_'+ rvbsm['Trip'].astype(str)+'-'+ rvbsm['Time'].astype(str)
print(' column added. \nresetting index...')
rvbsm = rvbsm.set_index('unique_trip')
print(' index reset')

print("working on evtwarn...")
evtwarn['unique_trip'] = evtwarn['Device'].astype(str) +'_'+ evtwarn['Trip'].astype(str)+'-'+ evtwarn['Time'].astype(str)
print(' column added. \nresetting index...')
evtwarn = evtwarn.set_index('unique_trip')
print(' index reset')

print("working on  spat...")
spat['unique_trip'] = spat['Device'].astype(str) +'_'+ spat['Trip'].astype(str)+'-'+ spat['Time'].astype(str)
print(' column added. \nresetting index...')
spat = spat.set_index('unique_trip')
print(' index reset')

working on host...
 column added. 
resetting index...
 index reset
working on rvbsm ...
 column added. 
resetting index...
 index reset
working on evtwarn...
 column added. 
resetting index...
 index reset
working on  spat...
 column added. 
resetting index...
 index reset


In [38]:
# find the min and max times of an event
host['is_event'] = 

Index(['Device', 'Trip', 'Time', 'RvDevice', 'NativeFlag', 'LocalTimeMS',
       'RvBasicVehClass', 'RvLatitude', 'RvLongitude', 'RvHeading', 'RvSpeed',
       'RvBrakeStatus', 'RvYawRate', 'RvLongitudinalAccel', 'RvEventFlags',
       'RvLongOffset', 'RvLatOffset', 'HvLatitude', 'HvLongitude', 'HvHeading',
       'HvSpeed', 'HvBrakeStatus', 'unique_trip'],
      dtype='object')

def col_added_message(df, col):
    if col in list(df.columns):
        print(col, 'successfully added')

print("working on host...")
host['merge_id'] = host['Device'].astype(str) +'_'+ host['Trip'].astype(str)  +'_'+ host['Time'].astype(str)
col_added_message(host, 'merge_id')

rvbsm['merge_id'] = rvbsm['Device'].astype(str) +'_'+ rvbsm['Trip'].astype(str)  +'_'+ rvbsm['Time'].astype(str)
col_added_message(rvbsm, 'merge_id')

evtwarn['merge_id'] = evtwarn['Device'].astype(str) +'_'+ evtwarn['Trip'].astype(str)  +'_'+ evtwarn['Time'].astype(str)
col_added_message(rvbsm, 'merge_id')

spat['merge_id'] = spat['Device'].astype(str) +'_'+ spat['Trip'].astype(str)  +'_'+ spat['Time'].astype(str)
col_added_message(rvbsm, 'merge_id')

# Set unique identifier as index
print("changing host index...")
host = host.set_index('merge_id')

print("changing rvbsm index...")
rvbsm = rvbsm.set_index('merge_id')

print("changing evtwarn index...")
evtwarn = evtwarn.set_index('merge_id')

print("changing spat index...")
spat = spat.set_index('merge_id')
print('done!')

host.columns

# Join Tables
ht_ = host.join(rvbsm,lsuffix='_host',rsuffix='_rvbsm').join(evtwarn,rsuffix='_evtwarn').join(spat,rsuffix='_spat')

# View Resulting DataFrame
df_interact(ht_)

# NOTE TO FIX: So many null values...
ht_.info()

## Part 3: Analyze Reaction

In this section, we analyze the reactions by the host driver when a warning is sent. First we will define a function that determines whether the host vehicle's brake status changes after a message is recieved. 

In [98]:
# To try on a different trip change these values
test_device = 2004
test_trip = 12

EvtWarn_mini = EvtWarn[(EvtWarn["Device"] == test_device) & (EvtWarn["Trip"] == test_trip)]
Host_mini = Host[(Host["Device"] == test_device) & (Host["Trip"] == test_trip)]
RvBsm_mini = RvBsm[(RvBsm["Device"] == test_device) & (RvBsm["Trip"] == test_trip)]

#### Test EvtWarn

In [47]:
df_interact(EvtWarn_mini)

interactive(children=(IntSlider(value=0, description='row', max=4, step=5), IntSlider(value=0, description='co…

(4 rows, 23 columns) total


#### Test Host

In [42]:
df_interact(Host_mini)

interactive(children=(IntSlider(value=0, description='row', max=2236, step=5), IntSlider(value=0, description=…

(2236 rows, 15 columns) total


#### Test RvBsm

In [43]:
df_interact(RvBsm_mini)

interactive(children=(IntSlider(value=0, description='row', max=1503, step=5), IntSlider(value=0, description=…

(1503 rows, 14 columns) total


In [31]:
EvtWarn["DeviceTrip"] = EvtWarn["Device"] * 10000 + EvtWarn["Trip"]
Host["DeviceTrip"] = Host["Device"] * 10000 + EvtWarn["Trip"]
RvBsm["DeviceTrip"] = RvBsm["Device"] * 10000 + EvtWarn["Trip"]

In [33]:
Evt_BSM = pd.merge(EvtWarn, RvBsm, on = 'DeviceTrip', how = 'inner')
df_interact(Evt_BSM)

interactive(children=(IntSlider(value=0, description='row', max=9817, step=5), IntSlider(value=0, description=…

(9817 rows, 36 columns) total


In [65]:
evtwarn = pd.read_csv("../Data/EvtWarn.csv")
evtwarn.columns

Index(['Device', 'Trip', 'Time', 'RvDevice', 'NativeFlag', 'LocalTimeMS',
       'PsId', 'RvBasicVehClass', 'RvRandomId', 'AlertLevel', 'EventAppId',
       'RvClass', 'RvLatitude', 'RvLongitude', 'RvHeading', 'RvElevation',
       'RvSpeed', 'RvBrakeStatus', 'RvYawRate', 'RvLongitudinalAccel',
       'RvTurnSignal', 'RvEventFlags', 'RvRange', 'RvRangeRate',
       'RvLongOffset', 'RvLatOffset', 'HvLatitude', 'HvLongitude', 'HvHeading',
       'HvElevation', 'HvSpeed', 'HvBrakeStatus', 'HvYawRate',
       'HvLongitudinalAccel', 'HvTurnSignal'],
      dtype='object')

In [75]:
evtwarn[evtwarn['EventAppId']== 1].groupby(['Device', 'Trip']).count()['Time'].sum()


58