## Crash Data Wrangling Jupyter Notebook

**Author:** Eric Englin and Meredith Raymer

**Date:** 11/12/21

**Purpose:** This notebook will combine IMARS datasets over three time periods and change data to merge with CDS and STARS datasets

In [81]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
from shapely.geometry import Point, LineString, Polygon

In [82]:
myworkingdirectory = r"C:\Users\eric.englin\Desktop\TSP"
os.chdir(myworkingdirectory)

## Step 1: Concatenate Similar Datasets 

**Note:** Datasets have been divided across three time periods, so these must be combined to start joining the tables together. 

In [83]:
path = './data/IMARS'

files = os.listdir(path)

for f in files:
    print(f)

NPS GOccIvPA Command Address 2011-2015.xlsx
NPS GOccIvPA Command Address 2016 to 4-2018.xlsx
NPS GOccIvPA Command Address 4-2018 to 6-2021.xlsx
NPS MVCOccGPersonReport Command 2011 to 2015.xlsx
NPS MVCOccGPersonReport Command 2016 to 4-2018.xlsx
NPS MVCOccGPersonReport Command 4-2018 to 6-2021.xlsx
NPS MVCOccReportCommand_Classification 2011-2015.xlsx
NPS MVCOccReportCommand_Classification 2016 to 4-2018.xlsx
NPS MVCOccReportCommand_Classification 4-2018 to 6-2021.xlsx
NPS MVCOccVehicleReport Command 2011 to 2015.xlsx
NPS MVCOccVehicleReport Command 2016 to 4-2018.xlsx
NPS MVCOccVehicleReport Command 4-2018 to 6-2021.xlsx


In [84]:
imars_crash_1 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 2011-2015.xlsx", sheet_name = "Batch1-Result1")


In [85]:
imars_crash_1 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 2011-2015.xlsx", sheet_name = "Batch1-Result1")
imars_crash_2 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_crash_3 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")

imars_passenger_1 = pd.read_excel("./data/IMARS/NPS MVCOccGPersonReport Command 2011 to 2015.xlsx", sheet_name = "Batch1-Result1")
imars_passenger_2 = pd.read_excel("./data/IMARS/NPS MVCOccGPersonReport Command 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_passenger_3 = pd.read_excel("./data/IMARS/NPS MVCOccGPersonReport Command 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")

imars_vehicle_1 = pd.read_excel("./data/IMARS/NPS MVCOccVehicleReport Command 2011 to 2015.xlsx", sheet_name = "Batch1-Result1")
imars_vehicle_2 = pd.read_excel("./data/IMARS/NPS MVCOccVehicleReport Command 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_vehicle_3 = pd.read_excel("./data/IMARS/NPS MVCOccVehicleReport Command 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")

imars_crash_details_1 = pd.read_excel("./data/IMARS/NPS MVCOccReportCommand_Classification 2011-2015.xlsx", sheet_name = "Batch1-Result1")
imars_crash_details_2 = pd.read_excel("./data/IMARS/NPS MVCOccReportCommand_Classification 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_crash_details_3 = pd.read_excel("./data/IMARS/NPS MVCOccReportCommand_Classification 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")



In [86]:
imars_crash = pd.concat([imars_crash_1, imars_crash_2, imars_crash_3])
imars_passenger = pd.concat([imars_passenger_1, imars_passenger_2, imars_passenger_3])
imars_vehicle = pd.concat([imars_vehicle_1, imars_vehicle_2, imars_vehicle_3])
imars_crash_details = pd.concat([imars_crash_details_1, imars_crash_details_2, imars_crash_details_3])


In [87]:
imars_crash.shape, imars_crash_1.shape, imars_crash_2.shape, imars_crash_3.shape

((12883, 32), (442, 32), (4454, 32), (7987, 32))

In [88]:
imars_passenger.shape, imars_passenger_1.shape, imars_passenger_2.shape, imars_passenger_3.shape

((19047, 28), (499, 28), (6782, 28), (11766, 28))

In [89]:
imars_vehicle.shape, imars_vehicle_1.shape, imars_vehicle_2.shape,imars_vehicle_3.shape

((16109, 51), (493, 51), (5529, 51), (10087, 51))

In [90]:
imars_crash_details.shape, imars_crash_details_1.shape, imars_crash_details_2.shape, imars_crash_details_3.shape

((11901, 24), (491, 24), (3907, 24), (7503, 24))

In [91]:
imars_crash.head()

Unnamed: 0,IMARS_Record_No,Crash_Date_Time,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,...,Latitude,Longitude,Region,State_Zone,Park,Site,Place,Point,Road_Type_Classification,Linked_Address
0,NP12000378,20120121 00:00:00:000,,,,,,,,,...,,,,,,,,,Highway/road/alley (includes street),
1,NP12000078,20120106 14:30:00:000,,,,,,,,,...,,,,,,,,,,
2,NP12000935,20120215 10:15:00:000,,,,,,,,,...,,,,,,,,,Highway/road/alley (includes street),
3,NP12001003,20120218 10:50:00:000,,,,,,,,,...,,,,,,,,,,
4,NP12000911,20120208 00:00:00:000,,,,,,,,,...,,,,,,,,,Parking lot/garage,


In [92]:
imars_passenger.Injury_Severity.value_counts()

01. No injury                    8398
99. Unknown                      1044
02. Possible injury               870
03. Non-incapacitating injury     724
04. Incapacitating injury         364
05. Fatal                          90
Name: Injury_Severity, dtype: int64

In [93]:
imars_passenger.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Driver_Action',
       'Driver_Condition', 'Driver_Distraction', 'Suspect_Alcohol',
       'Alcohol_Test', 'Alcohol_Test_Result_1', 'Alcohol_Test_Result_2',
       'Suspect_Drugs', 'Drug_Test', 'Violations_Issued', 'Seat_Position',
       'Injury_Severity', 'Air_Bag_Deployed', 'Ejection',
       'Injury_Transported_By', 'Safety_Equipment_Used',
       'Vehicle_number_striking_non_motorist', 'Injured_transported_by',
       'Non_motorist_safety_equipment',
       'Non_motorist_action_circumstance_prior_to_crash',
       'Non_motorist_action_circumstance_at_time_of_crash',
       'Non_motorist_condition_at_time_of_crash', 'Non_motorist_distraction',
       'Non_motorist_location_at_time_of_crash', 'Pedestrian_Type',
       'Pedestrian_Type_Detail'],
      dtype='object')

In [94]:
imars_crash.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Linked_Address_Classification',
       'City_Town_Park_Location', 'State', 'County', 'Direction',
       'Linked_Street_Number', 'Linked_Common_Name', 'Street_Type',
       'Direction.1', 'NEAR_Distance_to_MI', 'NEAR_Direction_To',
       'NEAR_Direction', 'NEAR_route_street_road_name', 'NEAR_Road_Type',
       'NEAR_Direction.1', 'AT_Intersection_route_street_road_DIRECTION',
       'At_Intersecting_route_street_road_name', 'AT_Road_Type',
       'AT_Direction', 'Mile_Marker', 'Latitude', 'Longitude', 'Region',
       'State_Zone', 'Park', 'Site', 'Place', 'Point',
       'Road_Type_Classification', 'Linked_Address'],
      dtype='object')

In [95]:
imars_crash_details.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Number_of_Vehicles_Involved',
       'Injury_or_Fatal_Crash', 'Investigated_at_Scene', 'Hit_and_Run',
       'Non_Motor_Vehicl_Property_Damage', 'Amount_of_Property_Damage',
       'First_Harmful_Event_Type', 'First_Harmful_Event',
       'Location_of_First_Harmful_Event', 'Weather', 'Roadway_Condition',
       'Lighting', 'School_Bus_related', 'AS_Road_Circumstance',
       'Environmental_Contributing_Circumstances', 'Work_Zone_Related',
       'Work_Zone_Workers_Present', 'Work_Zone_Location',
       'Law_Enforcement_Present_at_Work_Zone', 'Relation_to_Junction',
       'Type_of_Intersection', 'Manner_of_Collision'],
      dtype='object')

In [96]:
imars_vehicle.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Vehicle_Number',
       'Number_of_Occupants', 'Vehicle_Towed', 'Insurance_verified',
       'Initial_Impact_Point', 'Most_Damaged_Area', 'Extent_of_Damage',
       'Direction_of_Travel_Prior_to_Crash', 'Posted_Speed',
       'First_Event_Type', 'First_Event', 'Second_Event_Type', 'Second_Event',
       'Third_Event_Type', 'Third_Event', 'Fourth_Event_Type', 'Fourth_Event',
       'Motor_Vehicle_Unit_Type', 'Vehicle_Owner', 'Vehicle_Type',
       'Non_Commercial_Trailer_Style', 'Emergency_Vehicle_Use',
       'Emergency_Equipment_Activated', 'Special_Function_of_MV_in_Transport',
       'Motor_Vehicle_Contributing_Circumstance',
       'Vehicle_Maneuver_Action_Prior_to_Crash', 'Road_Surface', 'Grade',
       'Roadway_Alignment', 'Total_Number_of_Lanes', 'Traffic_Control',
       'Traffic_Control_Working_Properly', 'Roadway_Description',
       'Commercial_Non_Commercial', 'Number_of_Axles', 'Gross_Vehicle_Weight',
       'Combination_GVW', 

## Add Parks to Crash Details Dataset

**Note:** IMARS does not have park units with each crash, so these will have to be added using the Latitude and Longitude fields in the imars_crash dataset. 

In [97]:
imars_crash.shape

(12883, 32)

In [98]:
imars_crash.dropna(subset=['Park']).shape

(5419, 32)

In [99]:
imars_crash['Region'].value_counts()

PWR    1763
IMR    1554
SER    1478
NER     369
MWR     178
AKR      74
NCR      18
Name: Region, dtype: int64

In [100]:
imars_crash.dropna(subset=['Park'])['Region'].value_counts()

PWR    1763
IMR    1539
SER    1478
NER     369
MWR     178
AKR      74
NCR      18
Name: Region, dtype: int64

In [101]:
imars_crash_parks = imars_crash.dropna(subset=['Park'])
imars_crash_parks.shape

(5419, 32)

In [103]:
imars_crash.loc[imars_crash['Park'].isnull()==False].shape

(5419, 32)

In [104]:
imars_crash.loc[imars_crash['Park'].isnull()==True].shape

(7464, 32)

In [105]:
imars_crash_coords = imars_crash.loc[imars_crash['Park'].isnull()==True].dropna(subset=['Latitude','Longitude'])
imars_crash_coords.shape

(3261, 32)

In [106]:
3261+5419

8680

In [107]:
imars_crash_coords = imars_crash.loc[imars_crash['Park'].isnull()==True].dropna(subset=['Latitude','Longitude'])
imars_crash_coords_geo=gpd.GeoDataFrame(imars_crash_coords, geometry=gpd.points_from_xy(imars_crash_coords.Longitude, 
                                                                             imars_crash_coords.Latitude))

In [108]:
filename = "./shapefiles/NPS_-_Land_Resources_Division_Boundary_and_Tract_Data_Service.geojson"
file = open(filename)
parks = gpd.read_file(file)

In [109]:
imars_crash_coords_geo.crs = "EPSG:4326"
parks.crs = "EPSG:4326"

In [110]:
parks['geometry']=parks['geometry'].buffer(0.2)

imars_crash_coords_geo_withparknames=gpd.sjoin(imars_crash_coords_geo,parks,how="left", op='intersects')
imars_crash_coords_geo_withparknames.head()


  """Entry point for launching an IPython kernel.
  if (yield from self.run_code(code, result)):


Unnamed: 0,IMARS_Record_No,Crash_Date_Time,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,...,CREATED_BY,METADATA,PARKNAME,CreationDate,Creator,EditDate,Editor,GlobalID,Shape__Area,Shape__Length
410,NP14039836,20140513 13:30:00:000,,,,,,,,,...,,,,,,,,,,
412,NP14049772,20140607 14:42:00:000,,,,,,,,,...,,,,,,,,,,
414,NP14060606,20140625 20:40:00:000,,,,,,,,,...,Lands,Preliminary data. Contact the Land Resources P...,Olympic,2020-01-09T22:16:03,SCarlton@nps.gov_nps,2020-01-09T22:16:03,SCarlton@nps.gov_nps,d568927b-56f4-4f49-a52a-b8ab9f7676a3,8203591000.0,1158605.0
5,NP16029926,20160326 20:00:00:000,,,,,,,,,...,Lands,https://irma.nps.gov/DataStore/Reference/Profi...,Blue Ridge Parkway,2022-01-06T10:41:13,WASO,2022-01-06T10:41:13,WASO,a8ef8bcf-aaa2-4623-a8f6-1d93fe31f945,642257900.0,2287707.0
101,NP16041155,20160425 15:00:00:000,,,,,,,,,...,Lands,https://irma.nps.gov/DataStore/Reference/Profi...,Grand Canyon,2022-01-06T10:42:52,WASO,2022-01-06T10:42:52,WASO,16feb031-8263-422e-8bdc-deb1a65f6153,7487848000.0,2135274.0


In [111]:
imars_crash_coords_geo_withparknames.shape

(5137, 53)

In [112]:
imars_crash_coords_geo_withparknames2  = imars_crash_coords_geo_withparknames.drop_duplicates(subset=['IMARS_Record_No'])

In [113]:
imars_crash_coords_geo_withparknames2.shape

(3102, 53)

In [114]:
imars_crash_coords_geo_withparknames2.REGION.value_counts()

SE    1031
IM     507
PW     460
NE     285
MW      86
NC      52
AK       9
Name: REGION, dtype: int64

In [225]:
imars_crash_withparknames = pd.DataFrame(imars_crash_coords_geo_withparknames2.drop(columns='geometry'))
imars_crash_withparknames['Park']= imars_crash_withparknames['UNIT_CODE']

In [226]:
imars_crash_withparknames.shape

(3102, 52)

In [227]:
imars_crash_withparknames2 = imars_crash_withparknames[list(imars_crash_parks.columns)]
imars_crash_withparknames2.shape

(3102, 32)

In [228]:
imars_crash_expanded = pd.concat([imars_crash_parks,imars_crash_withparknames2])
imars_crash_expanded  = imars_crash_expanded.drop_duplicates(subset=['IMARS_Record_No'])
imars_crash_expanded.shape

(7811, 32)

## Step 2: Filter for Necessary Fields, Group by IMARS_RECORD_NO


#### Creating New Columns for Injury Severity

- Requires passenger dataset

In [230]:
imars_passenger['NUM_OCC'] = 1
imars_passenger['INCID_NO'] = imars_passenger['IMARS_Record_No']
imars_passenger['Crash_Date_Time_report'] = imars_passenger['Crash_Date_Time_person']

imars_passenger_slim = imars_passenger[[
    'INCID_NO', 'NUM_OCC', 'Crash_Date_Time_report','Injury_Severity'
]]

In [231]:
imars_passenger_slim.head()

Unnamed: 0,INCID_NO,NUM_OCC,Crash_Date_Time_report,Injury_Severity
0,NP12000078,1,20120106 14:30:00:000,
1,NP12000378,1,20120121 00:00:00:000,
2,NP12000550,1,20120131 13:51:00:000,
3,NP12000911,1,20120208 00:00:00:000,
4,NP12000935,1,20120215 10:15:00:000,


In [232]:
imars_passenger_slim['No Injury']= np.where(imars_passenger_slim['Injury_Severity']=='01. No injury', 1,0)
imars_passenger_slim['Possible Injury']= np.where(imars_passenger_slim['Injury_Severity']=='02. Possible injury', 1,0)
imars_passenger_slim['Non-incapacitating Injury']= np.where(imars_passenger_slim['Injury_Severity']=='03. Non-incapacitating injury', 1,0)
imars_passenger_slim['Incapacitating Injury']= np.where(imars_passenger_slim['Injury_Severity']=='04. Incapacitating injury', 1,0)
imars_passenger_slim['Fatality']= np.where(imars_passenger_slim['Injury_Severity']=='05. Fatal', 1,0)
imars_passenger_slim['Unknown Injury']= np.where(imars_passenger_slim['Injury_Severity']=='99. Unknown', 1,0)


In [233]:
imars_passenger_slim.Fatality.sum()

90

In [234]:
imars_passenger_slim_agg = imars_passenger_slim.groupby(by=['INCID_NO']).sum()
#imars_slim_agg = imars_slim_agg.drop(columns = ['ACCLASS','DINJ', 'PINJ'])
imars_passenger_slim_agg = imars_passenger_slim_agg.reset_index()


In [235]:
imars_passenger_slim_agg.shape

(11076, 8)

In [236]:
imars_passenger_slim_agg.Fatality.sum()

90

#### Creating New Columns for Accident Class

In [237]:
imars_crash_details['INCID_NO'] = imars_crash_details['IMARS_Record_No']

imars_crash_details_slim = imars_crash_details[[
    'INCID_NO', 'First_Harmful_Event_Type','First_Harmful_Event'
]]



In [238]:
imars_crash_details_slim.First_Harmful_Event_Type.value_counts()

Collision with person, MV or non-fixed object    4611
Collision with fixed object                      3358
Non-collision                                    1639
Collision with animals                           1247
Unknown                                           406
Name: First_Harmful_Event_Type, dtype: int64

In [239]:
imars_crash_details_slim.loc[imars_crash_details_slim['First_Harmful_Event_Type']=="Collision with person, MV or non-fixed object"].First_Harmful_Event.value_counts()

21. Motor vehicle in transport                                           2843
22. Parked motor vehicle                                                 1281
25. Other non-fixed object                                                295
18. Bicycle                                                                57
17. Pedestrian                                                             52
24. Work zone/maintenance equipment                                        25
23. Struck by falling, shifting cargo or anything set in motion by MV      16
20. Railway vehicle                                                         2
99. Unknown                                                                 1
Name: First_Harmful_Event, dtype: int64

In [240]:
pd.options.mode.chained_assignment = None  # default='warn'

imars_crash_details_slim['Collision with Fixed Object']= np.where(imars_crash_details_slim['First_Harmful_Event_Type']=="Collision with fixed object", 1,0)
imars_crash_details_slim['Collision with Animal']= np.where(imars_crash_details_slim['First_Harmful_Event_Type']=="Collision with animals", 1,0)
imars_crash_details_slim['Non-Collision']= np.where(imars_crash_details_slim['First_Harmful_Event_Type']=="Non-collision", 1,0)
imars_crash_details_slim['Other Accident Class']= np.where(imars_crash_details_slim['First_Harmful_Event_Type']=="Unknown",1,0)


imars_crash_details_slim['Collision with Other Motor Vehicle']= np.where(imars_crash_details_slim['First_Harmful_Event']=='21. Motor vehicle in transport', 1,0)
imars_crash_details_slim['Collision with Pedestrian']= np.where(imars_crash_details_slim['First_Harmful_Event']=="17. Pedestrian", 1,0)
imars_crash_details_slim['Collision with Bicycle']= np.where(imars_crash_details_slim['First_Harmful_Event']=="18. Bicycle", 1,0)
imars_crash_details_slim['Collision with Parked Motor Vehicle']= np.where(imars_crash_details_slim['First_Harmful_Event']=="22. Parked motor vehicle", 1,0)
imars_crash_details_slim['Collision with Railway Train']= np.where(imars_crash_details_slim['First_Harmful_Event']=="20. Railway vehicle", 1,0)
imars_crash_details_slim['Collision with Other Object']= np.where(imars_crash_details_slim['First_Harmful_Event']=="25. Other non-fixed object", 1,0)
imars_crash_details_slim['Collision with Unknown']= np.where(imars_crash_details_slim['First_Harmful_Event'].isin(['23. Struck by falling, shifting cargo or anything set in motion by MV',
                                                                                              '24. Work zone/maintenance equipment']), 1,0)


In [241]:
imars_crash_details_slim['Non-Collision'].value_counts()

0    10262
1     1639
Name: Non-Collision, dtype: int64

In [242]:
imars_crash_details_slim_nodups = imars_crash_details_slim.drop_duplicates()
imars_passenger_slim_agg_nodups = imars_passenger_slim_agg.drop_duplicates()

In [244]:
imars_crash_clean = imars_crash_expanded[['IMARS_Record_No','Latitude', 'Longitude', 'Park','Crash_Date_Time']]
imars_crash_clean = imars_crash_clean.rename(columns={"IMARS_Record_No": "INCID_NO"})

imars_crash_clean = imars_crash_clean.drop_duplicates("INCID_NO")

In [245]:
imars_crash_details_slim_nodups.shape, imars_crash_clean.shape, imars_passenger_slim_agg_nodups.shape

((11131, 14), (7811, 5), (11076, 8))

In [257]:
imars_slim_agg=imars_crash_clean.merge(imars_passenger_slim_agg_nodups, how='left', on='INCID_NO')
imars_slim_agg=imars_slim_agg.merge(imars_crash_details_slim_nodups, how='left', on='INCID_NO')


In [258]:
imars_slim_agg.shape, imars_crash_details_slim_nodups.shape, imars_crash_clean.shape, imars_passenger_slim_agg_nodups.shape

((7842, 25), (11131, 14), (7811, 5), (11076, 8))

In [259]:
imars_slim_agg.Fatality.sum()

71

In [260]:
imars_slim_agg.head()

Unnamed: 0,INCID_NO,Latitude,Longitude,Park,Crash_Date_Time,NUM_OCC,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,First_Harmful_Event_Type,First_Harmful_Event,Collision with Fixed Object,Collision with Animal,Non-Collision,Other Accident Class,Collision with Other Motor Vehicle,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Other Object,Collision with Unknown
0,NP13031262,,,BUFF,20130615 14:00:00:000,1,0,0,0,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0
1,NP13054509,,,MORU,20130803 10:56:00:000,1,0,0,0,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0
2,NP13071209,,,SHEN,20130905 12:03:00:000,1,0,0,0,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0
3,NP13082489,,,SHEN,20131006 09:10:00:000,1,0,0,0,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0
4,NP13082534,,,SHEN,20131006 11:42:00:000,1,0,0,0,0,0,0,,,0,0,0,0,0,0,0,0,0,0,0


In [261]:
imars_slim_agg.columns

Index(['INCID_NO', 'Latitude', 'Longitude', 'Park', 'Crash_Date_Time',
       'NUM_OCC', 'No Injury', 'Possible Injury', 'Non-incapacitating Injury',
       'Incapacitating Injury', 'Fatality', 'Unknown Injury',
       'First_Harmful_Event_Type', 'First_Harmful_Event',
       'Collision with Fixed Object', 'Collision with Animal', 'Non-Collision',
       'Other Accident Class', 'Collision with Other Motor Vehicle',
       'Collision with Pedestrian', 'Collision with Bicycle',
       'Collision with Parked Motor Vehicle', 'Collision with Railway Train',
       'Collision with Other Object', 'Collision with Unknown'],
      dtype='object')

In [262]:
imars_slim_agg = imars_slim_agg[['INCID_NO', 'NUM_OCC','Park', 'Non-Collision',
       'Collision with Other Motor Vehicle', 'Collision with Fixed Object',
       'Collision with Pedestrian', 'Collision with Bicycle',
       'Collision with Parked Motor Vehicle', 'Collision with Railway Train',
       'Collision with Animal', 'Collision with Other Object',
       'Collision with Unknown', 'Other Accident Class', 'No Injury',
       'Possible Injury', 'Non-incapacitating Injury', 'Incapacitating Injury',
       'Fatality', 'Unknown Injury', 'Latitude', 'Longitude', 'Crash_Date_Time']]

In [263]:
imars_slim_agg.head()

Unnamed: 0,INCID_NO,NUM_OCC,Park,Non-Collision,Collision with Other Motor Vehicle,Collision with Fixed Object,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Animal,Collision with Other Object,Collision with Unknown,Other Accident Class,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,Latitude,Longitude,Crash_Date_Time
0,NP13031262,1,BUFF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20130615 14:00:00:000
1,NP13054509,1,MORU,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20130803 10:56:00:000
2,NP13071209,1,SHEN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20130905 12:03:00:000
3,NP13082489,1,SHEN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20131006 09:10:00:000
4,NP13082534,1,SHEN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20131006 11:42:00:000


In [264]:
imars_slim_agg.Fatality.sum()

71

In [265]:
imars_slim_agg.to_csv("./crash_data_IMARS_clean.csv",index=False)