-
Notifications
You must be signed in to change notification settings - Fork 3
/
preprocessing_100Car.py
70 lines (58 loc) · 3.98 KB
/
preprocessing_100Car.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
'''
This script preprocesses the 100-Car Naturalistic Driving Study data.
The raw data is publicly available at https://doi.org/10.15787/VTT1/CEU6RB
'''
import pandas as pd
path_raw = './RawData/'
path_cleaned = './CleanedData/'
# Meta data processing
## Ego vehicle information
ego_vehicle = pd.read_csv(path_raw + '100CarVehicleInformation.csv')
### Vehicle dimensions source: https://www.auto123.com
ego_vehicle.loc[(ego_vehicle['make']=='Ford')&(ego_vehicle['model']=='Taurus'), ['width','length']] = [1.854, 5.017]
ego_vehicle.loc[(ego_vehicle['make']=='Chevrolet')&(ego_vehicle['model']=='Malibu'), ['width','length']] = [1.763, 4.836]
ego_vehicle.loc[(ego_vehicle['make']=='Toyota')&(ego_vehicle['model']=='Camry'), ['width','length']] = [1.780, 4.785]
ego_vehicle.loc[(ego_vehicle['make']=='Toyota')&(ego_vehicle['model']=='Corolla'), ['width','length']] = [1.695, 4.420]
ego_vehicle.loc[(ego_vehicle['make']=='Chevrolet')&(ego_vehicle['model']=='Cavalier'), ['width','length']] = [1.744, 4.595]
ego_vehicle.loc[(ego_vehicle['make']=='Ford')&(ego_vehicle['model']=='Explorer'), ['width','length']] = [1.782, 4.579]
ego_vehicle.loc[ego_vehicle['width'].isna(), ['width','length']] = [1.8, 4.5]
## target vehicle information
data_event = pd.read_csv(path_raw + '100CarEventVideoReducedData.csv')
vehicle_dimension = {}
### estimated by ChatGPT 4
vehicle_dimension['Automobile'] = {'width': 1.8, 'length': 4.5}
vehicle_dimension['Pickup truck'] = {'width': 1.9, 'length': 5.9}
vehicle_dimension['Tractor-trailer: Enclosed box'] = {'width': 2.6, 'length': 22.5}
vehicle_dimension['Sport Utility Vehicles'] = {'width': 2.0, 'length': 4.8}
vehicle_dimension['Van (minivan or standard van)'] = {'width': 2.1, 'length': 5.5}
vehicle_dimension['School bus'] = {'width': 2.4, 'length': 10.5}
vehicle_dimension['Single-unit straight truck: Box'] = {'width': 2.4, 'length': 7.3}
vehicle_dimension['Transit bus'] = {'width': 2.6, 'length': 11.5}
vehicle_dimension['Single-unit straight truck: Flatbed'] = {'width': 2.4, 'length': 7.3}
vehicle_dimension['Single-unit straight truck: Tow truck'] = {'width': 2.4, 'length': 8.9}
vehicle_dimension['Single-unit straight truck: Dump'] = {'width': 2.4, 'length': 7.3}
vehicle_dimension['Single-unit straight truck: Multistop/Step Van'] = {'width': 2.1, 'length': 6.4}
vehicle_dimension['Tractor-trailer: Tank'] = {'width': 2.6, 'length': 22.5}
vehicle_dimension['Other vehicle type'] = {'width': 1.8, 'length': 4.5}
vehicle_dimension['Unknown vehicle type'] = {'width': 1.8, 'length': 4.5}
vehicle_dimension = pd.DataFrame(vehicle_dimension).T
## ignore events with infrastructure, cyclist, pedestrian, motorcyclist, animal, and unknown target
data_event = data_event[data_event['target type'].isin(vehicle_dimension.index)].copy()
data_event['target_width'] = data_event['target type'].map(vehicle_dimension['width'])
data_event['target_length'] = data_event['target type'].map(vehicle_dimension['length'])
data_event[['ego_width','ego_length']] = ego_vehicle.set_index('vehicle webid').loc[data_event['vehicle webid']][['width','length']].values
## correct event start if it's 0
data_event.loc[data_event['event start']==0, 'event start'] = 1
## save meta data
crash_event = data_event[data_event['event severity']=='Crash']
nearcrash_event = data_event[data_event['event severity']=='Near-Crash']
crash_event.to_csv(path_cleaned + 'HundredCar_metadata_CrashEvent.csv', index=False)
nearcrash_event.to_csv(path_cleaned + 'HundredCar_metadata_NearCrashEvent.csv', index=False)
# Time-series data processing
for crash_type in ['Crash','NearCrash']:
data_raw = pd.read_csv(path_raw + 'HundredCar_'+crash_type+'_Public_Compiled.txt',
sep=',',
converters={77: lambda x: x.strip().replace('"', '').replace('.', 'NA'),
8: lambda x: 'NA' if x=='.' else x,
9: lambda x: 'NA' if x=='.' else x})
data_raw.to_csv(path_cleaned + 'HundredCar_'+crash_type+'_Public_Cleaned.csv', index=False)