## Crash Data Wrangling Jupyter Notebook

**Author:** Sophie Kaye

**Date:** 6/28/22

**Purpose:** This notebook will look for existing data pertaining to crashes related to speed and wildlife

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
from shapely.geometry import Point, LineString, Polygon

In [2]:
myworkingdirectory = r"C:\Users\Sophie.Kaye\Desktop\NPS Safety\Data Cleaning"
files = os.listdir(myworkingdirectory)
for f in files:
    print(f)

crash_data_IMARS_clean.csv
IMARS_crash.csv
IMARS_crash_details.csv
IMARS_noparks_nocoords_someinfo.csv
IMARS_passenger.csv
IMARS_vehicle.csv
test.csv


In [5]:
os.chdir(myworkingdirectory)

In [6]:
imars_crash = pd.read_csv("IMARS_crash.csv")
imars_crash_details = pd.read_csv("IMARS_crash_details.csv")
imars_passenger = pd.read_csv("IMARS_passenger.csv")
imars_vehicle = pd.read_csv("IMARS_vehicle.csv")

In [6]:
imars_passenger.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Driver_Action',
       'Driver_Condition', 'Driver_Distraction', 'Suspect_Alcohol',
       'Alcohol_Test', 'Alcohol_Test_Result_1', 'Alcohol_Test_Result_2',
       'Suspect_Drugs', 'Drug_Test', 'Violations_Issued', 'Seat_Position',
       'Injury_Severity', 'Air_Bag_Deployed', 'Ejection',
       'Injury_Transported_By', 'Safety_Equipment_Used', 'Vehicle_number',
       'Injured_transported_by', 'Non_motorist_safety_equipment',
       'Non_motorist_action_circumstance_prior_to_crash',
       'Non_motorist_action_circumstance_at_time_of_crash',
       'Non_motorist_condition_at_time_of_crash', 'Non_motorist_distraction',
       'Non_motorist_location_at_time_of_crash', 'Pedestrian_Type',
       'Pedestrian_Type_Detail', 'Involvement', 'NUM_OCC', 'INCID_NO',
       'Latitude', 'Longitude', 'Park', 'CRASH_DATE', 'CRASH_TIME',
       'CRASH_YEAR'],
      dtype='object')

In [7]:
imars_crash.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Linked_Address_Classification',
       'City_Town_Park_Location', 'State', 'County', 'Direction',
       'Linked_Street_Number', 'Linked_Common_Name', 'Street_Type',
       'Direction.1', 'NEAR_Distance_to_MI', 'NEAR_Direction_To',
       'NEAR_Direction', 'NEAR_route_street_road_name', 'NEAR_Road_Type',
       'NEAR_Direction.1', 'AT_Intersection_route_street_road_DIRECTION',
       'At_Intersecting_route_street_road_name', 'AT_Road_Type',
       'AT_Direction', 'Mile_Marker', 'Latitude', 'Longitude', 'Region',
       'State_Zone', 'Park', 'Site', 'Place', 'Point',
       'Road_Type_Classification', 'Linked_Address', 'index_right', 'OBJECTID',
       'UNIT_CODE', 'GIS_Notes', 'UNIT_NAME', 'DATE_EDIT', 'STATE', 'REGION',
       'GNIS_ID', 'UNIT_TYPE', 'CREATED_BY', 'METADATA', 'PARKNAME',
       'CreationDate', 'Creator', 'EditDate', 'Editor', 'GlobalID',
       'Shape__Area', 'Shape__Length', 'CRASH_DATE', 'CRASH_TIME',
       'CRASH_YEAR'

In [8]:
imars_crash_details.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Number_of_Vehicles_Involved',
       'Injury_or_Fatal_Crash', 'Investigated_at_Scene', 'Hit_and_Run',
       'Non_Motor_Vehicl_Property_Damage', 'Amount_of_Property_Damage',
       'First_Harmful_Event_Type', 'First_Harmful_Event',
       'Location_of_First_Harmful_Event', 'Weather', 'Roadway_Condition',
       'Lighting', 'School_Bus_related', 'AS_Road_Circumstance',
       'Environmental_Contributing_Circumstances', 'Work_Zone_Related',
       'Work_Zone_Workers_Present', 'Work_Zone_Location',
       'Law_Enforcement_Present_at_Work_Zone', 'Relation_to_Junction',
       'Type_of_Intersection', 'Manner_of_Collision', 'INCID_NO', 'Latitude',
       'Longitude', 'Park', 'CRASH_DATE', 'CRASH_TIME', 'CRASH_YEAR'],
      dtype='object')

In [9]:
imars_vehicle.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Vehicle_Number',
       'Number_of_Occupants', 'Vehicle_Towed', 'Insurance_verified',
       'Initial_Impact_Point', 'Most_Damaged_Area', 'Extent_of_Damage',
       'Direction_of_Travel_Prior_to_Crash', 'Posted_Speed',
       'First_Event_Type', 'First_Event', 'Second_Event_Type', 'Second_Event',
       'Third_Event_Type', 'Third_Event', 'Fourth_Event_Type', 'Fourth_Event',
       'Motor_Vehicle_Unit_Type', 'Vehicle_Owner', 'Vehicle_Type',
       'Non_Commercial_Trailer_Style', 'Emergency_Vehicle_Use',
       'Emergency_Equipment_Activated', 'Special_Function_of_MV_in_Transport',
       'Motor_Vehicle_Contributing_Circumstance',
       'Vehicle_Maneuver_Action_Prior_to_Crash', 'Road_Surface', 'Grade',
       'Roadway_Alignment', 'Total_Number_of_Lanes', 'Traffic_Control',
       'Traffic_Control_Working_Properly', 'Roadway_Description',
       'Commercial_Non_Commercial', 'Number_of_Axles', 'Gross_Vehicle_Weight',
       'Combination_GVW', 

# Speed Data Investigation

In [11]:
imars_vehicle.Posted_Speed.value_counts().sort_index()

01. 5 mph          281
02. 10 mph         440
03. 15 mph        1022
04. 20 mph         226
05. 25 mph        2413
06. 30 mph         470
07. 35 mph        3390
08. 40 mph         614
09. 45 mph        3821
10. 50 mph        1568
11. 55 mph        1827
12. 60 mph          31
13. 65 mph          40
14. 70 mph           7
15. 75 mph           2
98. Not posted    1528
99. Unknown       1246
Name: Posted_Speed, dtype: int64

In [12]:
imars_vehicle.shape

(23576, 57)

In [14]:
imars_vehicle.loc[imars_vehicle['Posted_Speed'].isnull()==True].shape

(4650, 57)

In [20]:
#the percentage of vehicle records with missing posted speeds is:
round(4650/23576,3)*100

19.7

In [24]:
pd.options.display.max_rows = 10000000
imars_passenger.Driver_Action.value_counts().sort_index()

01. No contributing action                                                                                                                                                                  4857
01. No contributing action; 02. Ran off roadway                                                                                                                                                5
01. No contributing action; 03. Failed to yield to right-of-way                                                                                                                                2
01. No contributing action; 08. Exceeded posted speed limit; 09. Drove too fast for conditions                                                                                                 1
01. No contributing action; 09. Drove too fast for conditions                                                                                                                                  2
01. No contributing action; 09. Dro

In [31]:
speeding_crashes = imars_passenger.loc[imars_passenger['Driver_Action'].str.contains("08. Exceeded posted speed limit")==True]
speeding_crashes['Driver_Action']

488      02. Ran off roadway; 08. Exceeded posted speed...
698      08. Exceeded posted speed limit; 09. Drove too...
723      02. Ran off roadway; 08. Exceeded posted speed...
800      02. Ran off roadway; 08. Exceeded posted speed...
1004     08. Exceeded posted speed limit; 19. Erratic/r...
1556     02. Ran off roadway; 08. Exceeded posted speed...
1558     02. Ran off roadway; 08. Exceeded posted speed...
1594     02. Ran off roadway; 08. Exceeded posted speed...
1659     08. Exceeded posted speed limit; 19. Erratic/r...
1797                       08. Exceeded posted speed limit
1911     02. Ran off roadway; 08. Exceeded posted speed...
2251                       08. Exceeded posted speed limit
2289     08. Exceeded posted speed limit; 09. Drove too...
2585     08. Exceeded posted speed limit; 15. Driver di...
2663     02. Ran off roadway; 08. Exceeded posted speed...
2841     02. Ran off roadway; 08. Exceeded posted speed...
2956     02. Ran off roadway; 08. Exceeded posted speed.

In [32]:
speeding_crashes.shape

(305, 37)

In [35]:
# 305 crashes included "Exceeded posted speed limit" as (at least one of the) contributing factors

In [33]:
reckless_crashes = imars_passenger.loc[imars_passenger['Driver_Action'].str.contains("09. Drove too fast for conditions")==True]
reckless_crashes['Driver_Action']

630      09. Drove too fast for conditions; 18. Failed ...
678                      09. Drove too fast for conditions
681                      09. Drove too fast for conditions
695      02. Ran off roadway; 09. Drove too fast for co...
698      08. Exceeded posted speed limit; 09. Drove too...
709      09. Drove too fast for conditions; 17. Followe...
715      02. Ran off roadway; 09. Drove too fast for co...
752      02. Ran off roadway; 07. Disregarded other roa...
800      02. Ran off roadway; 08. Exceeded posted speed...
849      09. Drove too fast for conditions; 17. Followe...
904      09. Drove too fast for conditions; 24. Over-co...
911      09. Drove too fast for conditions; 18. Failed ...
1094     02. Ran off roadway; 09. Drove too fast for co...
1096                     09. Drove too fast for conditions
1097                     09. Drove too fast for conditions
1098                     09. Drove too fast for conditions
1099                     09. Drove too fast for conditio

In [34]:
reckless_crashes.shape

(955, 37)

In [36]:
# 955 crashes included "Drove too fast for conditions" as (at least one of the) contributing factors

In [38]:
speeding_and_reckless_crashes = speeding_crashes.loc[speeding_crashes['Driver_Action'].str.contains("09. Drove too fast for conditions")==True]
speeding_and_reckless_crashes['Driver_Action']

698      08. Exceeded posted speed limit; 09. Drove too...
800      02. Ran off roadway; 08. Exceeded posted speed...
1556     02. Ran off roadway; 08. Exceeded posted speed...
1558     02. Ran off roadway; 08. Exceeded posted speed...
2289     08. Exceeded posted speed limit; 09. Drove too...
2663     02. Ran off roadway; 08. Exceeded posted speed...
2841     02. Ran off roadway; 08. Exceeded posted speed...
3529     08. Exceeded posted speed limit; 09. Drove too...
3533     08. Exceeded posted speed limit; 09. Drove too...
3570     08. Exceeded posted speed limit; 09. Drove too...
3827     08. Exceeded posted speed limit; 09. Drove too...
3838     02. Ran off roadway; 08. Exceeded posted speed...
3839     02. Ran off roadway; 08. Exceeded posted speed...
3981     02. Ran off roadway; 08. Exceeded posted speed...
3997     06. Disregarded other traffic sign; 08. Exceed...
4000     06. Disregarded other traffic sign; 08. Exceed...
4003     06. Disregarded other traffic sign; 08. Exceed.

In [39]:
speeding_and_reckless_crashes.shape

(153, 37)

In [56]:
# 153 crashes included BOTH "Exceeded posted speed" and "Drove too fast for conditions" as (at least two of the) contributing factors
# --double counts between the prior two queries

In [44]:
imars_passenger.shape

(27931, 37)

In [43]:
imars_passenger.loc[imars_passenger['Driver_Action'].isnull()==True].shape

(15854, 37)

In [45]:
#the percentage of passenger records with missing "Driver Action"s entries is:
round(15854/27931,3)*100

56.8

In [46]:
imars_passenger.Violations_Issued.value_counts().sort_index()

*NOA*                                                                                                                                                              15
*NOA*; 01. None                                                                                                                                                     1
*NOA*; 09. Improper lane use                                                                                                                                        1
01. None                                                                                                                                                         7921
01. None; 02. DWI/DUI; 20. Reckless driving; 27. Failure to use seat belt                                                                                           1
01. None; 09. Improper lane use                                                                                                                                     1
01. 

In [47]:
exceed_speed_citation = imars_passenger.loc[imars_passenger['Violations_Issued'].str.contains("04. Exceeding speed limit")==True]
exceed_speed_citation['Violations_Issued']

2986     04. Exceeding speed limit; 07. Wrong side of r...
3533     04. Exceeding speed limit; 33. Full time and a...
3981         04. Exceeding speed limit; 05. Speed too fast
4585     02. DWI/DUI; 03. Drinking (i.e. open container...
4766     04. Exceeding speed limit; 22. Driver's licens...
4768     04. Exceeding speed limit; 22. Driver's licens...
8616     04. Exceeding speed limit; 05. Speed too fast;...
10720    04. Exceeding speed limit; 34. Other (explain ...
11673               02. DWI/DUI; 04. Exceeding speed limit
12735                            04. Exceeding speed limit
20161    04. Exceeding speed limit; 34. Other (explain ...
21289                            04. Exceeding speed limit
21304    04. Exceeding speed limit; 20. Reckless drivin...
21833    04. Exceeding speed limit; 22. Driver's licens...
21866        04. Exceeding speed limit; 05. Speed too fast
22191    04. Exceeding speed limit; 22. Driver's licens...
22335    04. Exceeding speed limit; 15. Disregard offic.

In [48]:
exceed_speed_citation.shape

(26, 37)

In [58]:
# 26 crashes included "Exceededing speed limit" as (at least one of the) citations issued

In [50]:
too_fast_citation = imars_passenger.loc[imars_passenger['Violations_Issued'].str.contains("05. Speed too fast")==True]
too_fast_citation['Violations_Issued']

3729     02. DWI/DUI; 05. Speed too fast; 25. Hit and r...
3838                                    05. Speed too fast
3839                                    05. Speed too fast
3946                                    05. Speed too fast
3981         04. Exceeding speed limit; 05. Speed too fast
4546                                    05. Speed too fast
4585     02. DWI/DUI; 03. Drinking (i.e. open container...
4838     02. DWI/DUI; 05. Speed too fast; 22. Driver's ...
5109                  05. Speed too fast; 24. No insurance
5151     05. Speed too fast; 34. Other (explain in narr...
5214     05. Speed too fast; 26. Registration violation...
6364                                    05. Speed too fast
6597           05. Speed too fast; 06. Following too close
7457     05. Speed too fast; 22. Driver's license viola...
8282                      05. Speed too fast; 32. Careless
8550                                    05. Speed too fast
8565     05. Speed too fast; 09. Improper lane use; 20..

In [51]:
too_fast_citation.shape

(77, 37)

In [None]:
# 77 crashes included "Speed too fast" as (at least one of the) citations issued

In [54]:
ExceedSpeed_and_TooFast_citation = exceed_speed_citation.loc[exceed_speed_citation['Violations_Issued'].str.contains("05. Speed too fast")==True]
ExceedSpeed_and_TooFast_citation['Violations_Issued']

3981         04. Exceeding speed limit; 05. Speed too fast
4585     02. DWI/DUI; 03. Drinking (i.e. open container...
8616     04. Exceeding speed limit; 05. Speed too fast;...
21866        04. Exceeding speed limit; 05. Speed too fast
23072    04. Exceeding speed limit; 05. Speed too fast;...
23483        04. Exceeding speed limit; 05. Speed too fast
Name: Violations_Issued, dtype: object

In [55]:
ExceedSpeed_and_TooFast_citation.shape

(6, 37)

In [57]:
# 6 crashes included BOTH "Exceededing speed limit" and "Speed too fast" as (at least two of the) citations issued
# --double counts between the prior two queries

In [59]:
imars_passenger.shape

(27931, 37)

In [60]:
imars_passenger.loc[imars_passenger['Violations_Issued'].isnull()==True].shape

(17435, 37)

In [61]:
#the percentage of passenger records with missing "Violations Issued" entries is:
round(17435/27931,3)*100

62.4

# Wildlife Crash Data Investigation

In [62]:
imars_passenger.Driver_Action.value_counts().sort_index()

01. No contributing action                                                                                                                                                                  4857
01. No contributing action; 02. Ran off roadway                                                                                                                                                5
01. No contributing action; 03. Failed to yield to right-of-way                                                                                                                                2
01. No contributing action; 08. Exceeded posted speed limit; 09. Drove too fast for conditions                                                                                                 1
01. No contributing action; 09. Drove too fast for conditions                                                                                                                                  2
01. No contributing action; 09. Dro

In [65]:
animal_crashes = imars_passenger.loc[imars_passenger['Driver_Action'].str.contains("21. Avoiding animal")==True]
animal_crashes['Driver_Action']

670                                    21. Avoiding animal
700                                    21. Avoiding animal
722               02. Ran off roadway; 21. Avoiding animal
730      21. Avoiding animal; 24. Over-correcting/over-...
784                                    21. Avoiding animal
820                                    21. Avoiding animal
835                                    21. Avoiding animal
844      11. Improper backing; 15. Driver distraction; ...
850                                    21. Avoiding animal
878                                    21. Avoiding animal
882                                    21. Avoiding animal
886                                    21. Avoiding animal
914                                    21. Avoiding animal
963                         21. Avoiding animal; 98. Other
969                                    21. Avoiding animal
972                                    21. Avoiding animal
1192     09. Drove too fast for conditions; 21. Avoidin.

In [66]:
animal_crashes.shape

(229, 37)

In [98]:
# 229 crashes included "Avoiding animal" as (at least one of the) contributing factors
# already know the percent null from above speed data investigation

In [88]:
imars_vehicle.First_Event_Type.value_counts().sort_index()

01. Non-collision                                     2909
02. Collision with person, MV or non-fixed object    12896
03. Collision with animal                             1337
04. Collision with fixed object                       3194
05. Not applicable                                     314
06. Unknown                                            665
Name: First_Event_Type, dtype: int64

In [99]:
# 1337 vehicle records include collision with animal as a contributing factor

In [77]:
imars_vehicle.shape

(23576, 57)

In [78]:
imars_vehicle.loc[imars_vehicle['First_Event_Type'].isnull()==True].shape

(2261, 57)

In [79]:
#the percentage of vehicle records with missing "First Event Type" entries is:
round(2261/23576,3)*100

9.6

In [7]:
imars_vehicle.Second_Event_Type.value_counts().sort_index()

01. Non-collision                                     1266
02. Collision with person, MV or non-fixed object     1282
03. Collision with animal                               26
04. Collision with fixed object                       2101
05. Not applicable                                   10415
06. Unknown                                           4075
Name: Second_Event_Type, dtype: int64

In [8]:
imars_vehicle.loc[imars_vehicle['Second_Event_Type'].isnull()==True].shape

(4411, 57)

In [9]:
#the percentage of vehicle records with missing "Second Event Type" entries is:
round(4411/23576,3)*100

18.7

In [10]:
imars_vehicle.Third_Event_Type.value_counts().sort_index()

01. Non-collision                                      654
02. Collision with person, MV or non-fixed object      309
03. Collision with animal                                8
04. Collision with fixed object                        837
05. Not applicable                                   12419
06. Unknown                                           4718
Name: Third_Event_Type, dtype: int64

In [11]:
imars_vehicle.loc[imars_vehicle['Third_Event_Type'].isnull()==True].shape

(4631, 57)

In [12]:
#the percentage of vehicle records with missing "Third Event Type" entries is:
round(4631/23576,3)*100

19.6

In [13]:
imars_vehicle.Fourth_Event_Type.value_counts().sort_index()

01. Non-collision                                      394
02. Collision with person, MV or non-fixed object      158
03. Collision with animal                                8
04. Collision with fixed object                        288
05. Not applicable                                   13082
06. Unknown                                           4942
Name: Fourth_Event_Type, dtype: int64

In [14]:
imars_vehicle.loc[imars_vehicle['Fourth_Event_Type'].isnull()==True].shape

(4704, 57)

In [15]:
#the percentage of vehicle records with missing "Fourth Event Type" entries is:
round(4704/23576,3)*100

20.0

In [32]:
#total vehicle records in which "collision with animal" was listed as at least one of four event types:
1337+26+8+8

1379

In [34]:
no_EventType1 = imars_vehicle.loc[imars_vehicle['First_Event_Type'].isnull()==True]
no_EventType1_2 = no_EventType1.loc[no_EventType1['Second_Event_Type'].isnull()==True]
no_EventType1_3 = no_EventType1_2.loc[no_EventType1_2['Third_Event_Type'].isnull()==True]
no_EventType_any = no_EventType1_3.loc[no_EventType1_3['Fourth_Event_Type'].isnull()==True]
no_EventType_any.shape

(2259, 57)

In [35]:
#the percentage of vehicle records with no "Event Types" whatsoever
round(2559/23576,3)*100

10.9

In [18]:
pd.options.display.max_rows = 10000000
imars_vehicle.First_Event.value_counts().sort_index()

00. Not applicable                                                         313
01. Overturn/rollover                                                      201
02. Fire/explosion                                                          13
03. Immersion                                                               11
04. Jackknife                                                               10
05. Cargo/equipment loss or shift                                           22
06. Separation of units                                                     13
07. Ran off the road right                                                1084
08. Ran Off the road left                                                  355
09. Cross median or centerline                                             229
10. Downhill runaway                                                        20
11. Fell/jumped from motor vehicle                                          26
12. Avoiding an object on road                      

In [19]:
imars_vehicle.loc[imars_vehicle['First_Event'].isnull()==True].shape

(2313, 57)

In [20]:
#the percentage of vehicle records with missing "First Event" entries is:
round(2313/23576,3)*100

9.8

In [21]:
imars_vehicle.Second_Event.value_counts().sort_index()

00. Not applicable                                                       10369
01. Overturn/rollover                                                      276
02. Fire/explosion                                                           9
03. Immersion                                                                1
04. Jackknife                                                                4
05. Cargo/equipment loss or shift                                            5
06. Separation of units                                                     82
07. Ran off the road right                                                 203
08. Ran Off the road left                                                  152
09. Cross median or centerline                                             106
10. Downhill runaway                                                        21
11. Fell/jumped from motor vehicle                                          56
12. Avoiding an object on road                      

In [22]:
imars_vehicle.loc[imars_vehicle['Second_Event'].isnull()==True].shape

(4496, 57)

In [23]:
#the percentage of vehicle records with missing "Second Event" entries is:
round(4496/23576,3)*100

19.1

In [28]:
imars_vehicle.Third_Event.value_counts().sort_index()

00. Not applicable                                                       12366
01. Overturn/rollover                                                      179
02. Fire/explosion                                                           7
03. Immersion                                                                2
04. Jackknife                                                                1
05. Cargo/equipment loss or shift                                            7
06. Separation of units                                                     10
07. Ran off the road right                                                  43
08. Ran Off the road left                                                   72
09. Cross median or centerline                                              17
10. Downhill runaway                                                         7
11. Fell/jumped from motor vehicle                                          18
12. Avoiding an object on road                      

In [25]:
imars_vehicle.loc[imars_vehicle['Third_Event'].isnull()==True].shape

(4711, 57)

In [26]:
#the percentage of vehicle records with missing "Third Event" entries is:
round(4711/23576,3)*100

20.0

In [29]:
imars_vehicle.Fourth_Event.value_counts().sort_index()

00. Not applicable                                                       13026
01. Overturn/rollover                                                       80
02. Fire/explosion                                                           6
05. Cargo/equipment loss or shift                                            4
06. Separation of units                                                     10
07. Ran off the road right                                                   9
08. Ran Off the road left                                                    5
09. Cross median or centerline                                               4
10. Downhill runaway                                                         3
11. Fell/jumped from motor vehicle                                           9
13. Avoiding an animal on road                                              13
16. Other non-collision                                                    242
17. Pedestrian                                      

In [30]:
imars_vehicle.loc[imars_vehicle['Fourth_Event'].isnull()==True].shape

(4787, 57)

In [31]:
#the percentage of vehicle records with missing "Fourth Event" entries is:
round(4787/23576,3)*100

20.3

In [36]:
#total vehicle records in which "13. Avoiding an animal on road" was listed as at least one of four events:
135+4+1+13

153

In [37]:
#total vehicle records in which "26. Horse/llama" was listed as at least one of four events:
15+1+1+1

18

In [38]:
#total vehicle records in which "27. Cow" was listed as at least one of four events:
4+1

5

In [40]:
#total vehicle records in which "28. Deer" was listed as at least one of four events:
883+10+3+3

899

In [41]:
#total vehicle records in which "29. Elk" was listed as at least one of four events:
109+2+1+1

113

In [42]:
#total vehicle records in which "30. Moose" was listed as at least one of four events:
14+0+0+0

14

In [43]:
#total vehicle records in which "31. Bison" was listed as at least one of four events:
151+11+2+2

166

In [44]:
#total vehicle records in which "32. Bear" was listed as at least one of four events:
100+1+1+1

103

In [45]:
#total vehicle records in which "33. Antelope" was listed as at least one of four events:
10+0+0+0

10

In [46]:
#total vehicle records in which "34. Sheep/goats" was listed as at least one of four events:
13+0+0+0

13

In [47]:
#total vehicle records in which "36. Other wild animal" was listed as at least one of four events:
33+0+0+0

33

In [48]:
#total vehicle records in which "37. other domestic" was listed as at least one of four events:
4+0+0+0

4

In [49]:
no_Event1 = imars_vehicle.loc[imars_vehicle['First_Event'].isnull()==True]
no_Event1_2 = no_Event1.loc[no_Event1['Second_Event'].isnull()==True]
no_Event1_3 = no_Event1_2.loc[no_Event1_2['Third_Event'].isnull()==True]
no_Event_any = no_Event1_3.loc[no_Event1_3['Fourth_Event'].isnull()==True]
no_Event_any.shape

(2305, 57)

In [50]:
#the percentage of vehicle records with no "Event Types" whatsoever
round(2305/23576,3)*100

9.8

In [100]:
# 1376 crash detail records include collision with animal as a contributing factor 
# note that this value is inconsistent with the number (1337) indicated by "First_Event_Type" in the vehicle table, which may indicate poor data quality
# (discrepancy is not intuitive because expect that vehicle table would have more entries than crash details when multiple vehicles are involved in a single crash)

In [70]:
imars_crash_details.shape

(15302, 31)

In [71]:
imars_crash_details.loc[imars_crash_details['First_Harmful_Event_Type'].isnull()==True].shape

(757, 31)

In [72]:
#the percentage of crashes with missing "First Harmful Event Type" records is:
round(757/15302,3)*100

4.9

In [80]:
imars_crash_details.Environmental_Contributing_Circumstances.value_counts()

01. None                                                                                    10032
02. Weather                                                                                  1270
06. Animal(s) in roadway                                                                      974
99. Unknown                                                                                   595
03. Physical obstruction(s) (trees, bushes, etc.)                                             239
07. Other                                                                                     229
05. Glare                                                                                      88
02. Weather; 03. Physical obstruction(s) (trees, bushes, etc.)                                 86
02. Weather; 07. Other                                                                         56
02. Weather; 06. Animal(s) in roadway                                                          50
04. Rockfall        

In [92]:
animal_in_roadway = imars_crash_details.loc[imars_crash_details['Environmental_Contributing_Circumstances'].str.contains('Animal')==True]
animal_in_roadway['Environmental_Contributing_Circumstances']

516                               06. Animal(s) in roadway
549                               06. Animal(s) in roadway
560                               06. Animal(s) in roadway
561                               06. Animal(s) in roadway
566                               06. Animal(s) in roadway
574                               06. Animal(s) in roadway
585                               06. Animal(s) in roadway
591                               06. Animal(s) in roadway
592                               06. Animal(s) in roadway
593      03. Physical obstruction(s) (trees, bushes, et...
613                               06. Animal(s) in roadway
614                               06. Animal(s) in roadway
633                               06. Animal(s) in roadway
639                               06. Animal(s) in roadway
644                               06. Animal(s) in roadway
646                               06. Animal(s) in roadway
658                               06. Animal(s) in roadw

In [93]:
animal_in_roadway.shape

(1081, 31)

In [94]:
# 1081 crashes included "Animal(s) in roadway" as (at least one of the) environmental contributing factors

In [95]:
imars_crash_details.shape

(15302, 31)

In [96]:
imars_crash_details.loc[imars_crash_details['Environmental_Contributing_Circumstances'].isnull()==True].shape

(1500, 31)

In [97]:
#the percentage of crashes with missing "Environmental Contributing Circumstance" records is:
round(1500/15302,3)*100

9.8