## Crash Data VRU and Work Zone Exploration Jupyter Notebook

**Author:** Sophie Kaye

**Date:** 7/12/22

**Purpose:** This notebook will assess data quality of existing data pertaining to crashes involving to Vulnerable Road Users (pedestrians and cyclists) and Work Zones

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
myworkingdirectory = r"C:\Users\Sophie.Kaye\Desktop\NPS Safety\Data Cleaning"
files = os.listdir(myworkingdirectory)
for f in files:
    print(f)

crash_data_IMARS_clean.csv
IMARS_crash.csv
IMARS_crash_details.csv
IMARS_noparks_nocoords_someinfo.csv
IMARS_passenger.csv
IMARS_vehicle.csv
test.csv


In [3]:
os.chdir(myworkingdirectory)

In [4]:
imars_crash = pd.read_csv("IMARS_crash.csv")
imars_crash_details = pd.read_csv("IMARS_crash_details.csv")
imars_passenger = pd.read_csv("IMARS_passenger.csv")
imars_vehicle = pd.read_csv("IMARS_vehicle.csv")

In [6]:
imars_passenger.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Driver_Action',
       'Driver_Condition', 'Driver_Distraction', 'Suspect_Alcohol',
       'Alcohol_Test', 'Alcohol_Test_Result_1', 'Alcohol_Test_Result_2',
       'Suspect_Drugs', 'Drug_Test', 'Violations_Issued', 'Seat_Position',
       'Injury_Severity', 'Air_Bag_Deployed', 'Ejection',
       'Injury_Transported_By', 'Safety_Equipment_Used', 'Vehicle_number',
       'Injured_transported_by', 'Non_motorist_safety_equipment',
       'Non_motorist_action_circumstance_prior_to_crash',
       'Non_motorist_action_circumstance_at_time_of_crash',
       'Non_motorist_condition_at_time_of_crash', 'Non_motorist_distraction',
       'Non_motorist_location_at_time_of_crash', 'Pedestrian_Type',
       'Pedestrian_Type_Detail', 'Involvement', 'NUM_OCC', 'INCID_NO',
       'Latitude', 'Longitude', 'Park', 'CRASH_DATE', 'CRASH_TIME',
       'CRASH_YEAR'],
      dtype='object')

In [7]:
imars_crash.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Linked_Address_Classification',
       'City_Town_Park_Location', 'State', 'County', 'Direction',
       'Linked_Street_Number', 'Linked_Common_Name', 'Street_Type',
       'Direction.1', 'NEAR_Distance_to_MI', 'NEAR_Direction_To',
       'NEAR_Direction', 'NEAR_route_street_road_name', 'NEAR_Road_Type',
       'NEAR_Direction.1', 'AT_Intersection_route_street_road_DIRECTION',
       'At_Intersecting_route_street_road_name', 'AT_Road_Type',
       'AT_Direction', 'Mile_Marker', 'Latitude', 'Longitude', 'Region',
       'State_Zone', 'Park', 'Site', 'Place', 'Point',
       'Road_Type_Classification', 'Linked_Address', 'index_right', 'OBJECTID',
       'UNIT_CODE', 'GIS_Notes', 'UNIT_NAME', 'DATE_EDIT', 'STATE', 'REGION',
       'GNIS_ID', 'UNIT_TYPE', 'CREATED_BY', 'METADATA', 'PARKNAME',
       'CreationDate', 'Creator', 'EditDate', 'Editor', 'GlobalID',
       'Shape__Area', 'Shape__Length', 'CRASH_DATE', 'CRASH_TIME',
       'CRASH_YEAR'

In [8]:
imars_crash_details.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Number_of_Vehicles_Involved',
       'Injury_or_Fatal_Crash', 'Investigated_at_Scene', 'Hit_and_Run',
       'Non_Motor_Vehicl_Property_Damage', 'Amount_of_Property_Damage',
       'First_Harmful_Event_Type', 'First_Harmful_Event',
       'Location_of_First_Harmful_Event', 'Weather', 'Roadway_Condition',
       'Lighting', 'School_Bus_related', 'AS_Road_Circumstance',
       'Environmental_Contributing_Circumstances', 'Work_Zone_Related',
       'Work_Zone_Workers_Present', 'Work_Zone_Location',
       'Law_Enforcement_Present_at_Work_Zone', 'Relation_to_Junction',
       'Type_of_Intersection', 'Manner_of_Collision', 'INCID_NO', 'Latitude',
       'Longitude', 'Park', 'CRASH_DATE', 'CRASH_TIME', 'CRASH_YEAR'],
      dtype='object')

In [9]:
imars_vehicle.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Vehicle_Number',
       'Number_of_Occupants', 'Vehicle_Towed', 'Insurance_verified',
       'Initial_Impact_Point', 'Most_Damaged_Area', 'Extent_of_Damage',
       'Direction_of_Travel_Prior_to_Crash', 'Posted_Speed',
       'First_Event_Type', 'First_Event', 'Second_Event_Type', 'Second_Event',
       'Third_Event_Type', 'Third_Event', 'Fourth_Event_Type', 'Fourth_Event',
       'Motor_Vehicle_Unit_Type', 'Vehicle_Owner', 'Vehicle_Type',
       'Non_Commercial_Trailer_Style', 'Emergency_Vehicle_Use',
       'Emergency_Equipment_Activated', 'Special_Function_of_MV_in_Transport',
       'Motor_Vehicle_Contributing_Circumstance',
       'Vehicle_Maneuver_Action_Prior_to_Crash', 'Road_Surface', 'Grade',
       'Roadway_Alignment', 'Total_Number_of_Lanes', 'Traffic_Control',
       'Traffic_Control_Working_Properly', 'Roadway_Description',
       'Commercial_Non_Commercial', 'Number_of_Axles', 'Gross_Vehicle_Weight',
       'Combination_GVW', 

In [10]:
pd.options.display.max_rows = 10000000
imars_vehicle.First_Event.value_counts().sort_index()

00. Not applicable                                                         313
01. Overturn/rollover                                                      201
02. Fire/explosion                                                          13
03. Immersion                                                               11
04. Jackknife                                                               10
05. Cargo/equipment loss or shift                                           22
06. Separation of units                                                     13
07. Ran off the road right                                                1084
08. Ran Off the road left                                                  355
09. Cross median or centerline                                             229
10. Downhill runaway                                                        20
11. Fell/jumped from motor vehicle                                          26
12. Avoiding an object on road                      

In [14]:
imars_vehicle.Second_Event.value_counts().sort_index()

00. Not applicable                                                       10369
01. Overturn/rollover                                                      276
02. Fire/explosion                                                           9
03. Immersion                                                                1
04. Jackknife                                                                4
05. Cargo/equipment loss or shift                                            5
06. Separation of units                                                     82
07. Ran off the road right                                                 203
08. Ran Off the road left                                                  152
09. Cross median or centerline                                             106
10. Downhill runaway                                                        21
11. Fell/jumped from motor vehicle                                          56
12. Avoiding an object on road                      

In [12]:
imars_vehicle.Third_Event.value_counts().sort_index()

00. Not applicable                                                       12366
01. Overturn/rollover                                                      179
02. Fire/explosion                                                           7
03. Immersion                                                                2
04. Jackknife                                                                1
05. Cargo/equipment loss or shift                                            7
06. Separation of units                                                     10
07. Ran off the road right                                                  43
08. Ran Off the road left                                                   72
09. Cross median or centerline                                              17
10. Downhill runaway                                                         7
11. Fell/jumped from motor vehicle                                          18
12. Avoiding an object on road                      

In [13]:
imars_vehicle.Fourth_Event.value_counts().sort_index()

00. Not applicable                                                       13026
01. Overturn/rollover                                                       80
02. Fire/explosion                                                           6
05. Cargo/equipment loss or shift                                            4
06. Separation of units                                                     10
07. Ran off the road right                                                   9
08. Ran Off the road left                                                    5
09. Cross median or centerline                                               4
10. Downhill runaway                                                         3
11. Fell/jumped from motor vehicle                                           9
13. Avoiding an animal on road                                              13
16. Other non-collision                                                    242
17. Pedestrian                                      

In [15]:
# total vehicle records in which "17. Pedestrian" was listed as at least one of four events:
70+24+4+2

100

In [16]:
# total vehicle records in which "18. Bicycle" was listed as at least one of four events:
118+7+0+0

125

In [17]:
# total vehicle records in which "19. Other pedacycle" was listed as at least one of four events:
5+3+1+1

10

In [18]:
no_Event1 = imars_vehicle.loc[imars_vehicle['First_Event'].isnull()==True]
no_Event1_2 = no_Event1.loc[no_Event1['Second_Event'].isnull()==True]
no_Event1_3 = no_Event1_2.loc[no_Event1_2['Third_Event'].isnull()==True]
no_Event_any = no_Event1_3.loc[no_Event1_3['Fourth_Event'].isnull()==True]
no_Event_any.shape

(2305, 57)

In [27]:
imars_vehicle.shape

(23576, 57)

In [19]:
#the percentage of vehicle records with no "Events" whatsoever
round(2305/23576,3)*100

9.8

In [20]:
imars_vehicle.Traffic_Control.value_counts().sort_index()

01. None                                      12723
02. Stop sign                                   926
03. Yield sign                                  232
04. Flashing traffic signal                      28
05. Traffic signal                              790
06. Pedestrian crossing                         155
07. Person (ofc/flagger, xing guard, etc.)       91
08. No passing zone                            1246
10. Temporary traffic control                   135
12. RR crossing device/signal                     5
13. Other                                       527
99. Unknown                                    1198
Name: Traffic_Control, dtype: int64

In [21]:
# 155 vehicle records that took place at pedestrian crossings

In [22]:
imars_vehicle.loc[imars_vehicle['Traffic_Control'].isnull()==True].shape

(5267, 57)

In [23]:
#the percentage of vehicle records with no "Traffic_Control" entry
round(5267/23576,3)*100

22.3

In [24]:
imars_passenger.Pedestrian_Type_Detail.value_counts().sort_index()

01. Pedestrian                             40
02. Bicyclist                              65
03. Other pedalcyclist                      1
04. Equestrian/other rider                  2
05. Other pedestrian (wheelchair, etc.)     7
99. Unknown                                45
Name: Pedestrian_Type_Detail, dtype: int64

In [26]:
imars_passenger.loc[imars_passenger['Pedestrian_Type_Detail'].isnull()==True].shape

(27771, 37)

In [28]:
imars_passenger.shape

(27931, 37)

In [29]:
#the percentage of passenger records with no "Pedestrian Type Detail" entry
round(27771/27931,3)*100

99.4

In [30]:
imars_passenger.Driver_Action.value_counts().sort_index()

01. No contributing action                                                                                                                                                                  4857
01. No contributing action; 02. Ran off roadway                                                                                                                                                5
01. No contributing action; 03. Failed to yield to right-of-way                                                                                                                                2
01. No contributing action; 08. Exceeded posted speed limit; 09. Drove too fast for conditions                                                                                                 1
01. No contributing action; 09. Drove too fast for conditions                                                                                                                                  2
01. No contributing action; 09. Dro

In [31]:
VRU_crashes = imars_passenger.loc[imars_passenger['Driver_Action'].str.contains("22. Avoiding non-motorist/pedestrian")==True]
VRU_crashes['Driver_Action']

2000                  22. Avoiding non-motorist/pedestrian
2002                  22. Avoiding non-motorist/pedestrian
3612                  22. Avoiding non-motorist/pedestrian
3655                  22. Avoiding non-motorist/pedestrian
3657                  22. Avoiding non-motorist/pedestrian
3659                  22. Avoiding non-motorist/pedestrian
3661                  22. Avoiding non-motorist/pedestrian
3663                  22. Avoiding non-motorist/pedestrian
5628     15. Driver distraction; 22. Avoiding non-motor...
6380     02. Ran off roadway; 22. Avoiding non-motorist...
6415                  22. Avoiding non-motorist/pedestrian
6799                  22. Avoiding non-motorist/pedestrian
7843     22. Avoiding non-motorist/pedestrian; 23. Avoi...
7846     22. Avoiding non-motorist/pedestrian; 23. Avoi...
8064                  22. Avoiding non-motorist/pedestrian
8496     15. Driver distraction; 19. Erratic/reckless/c...
8609     09. Drove too fast for conditions; 22. Avoidin.

In [32]:
VRU_crashes.shape

(26, 37)

In [33]:
# 26 crashes included "Avoiding non-motorist/pedestrian" as (at least one of the) contributing factors

In [34]:
imars_passenger.loc[imars_passenger['Driver_Action'].isnull()==True].shape

(15854, 37)

In [35]:
#the percentage of passenger records with missing "Driver Action" entries is:
round(15854/27931,3)*100

56.8

In [36]:
imars_passenger.Non_motorist_action_circumstance_at_time_of_crash.value_counts().sort_index()

01. No improper action                                                                       160
01. No improper action; 02. Dart/dash                                                          1
01. No improper action; 09. Inattentive                                                        2
01. No improper action; 10. Not visible                                                        1
02. Dart/dash                                                                                 11
02. Dart/dash; 03. Failure to yield right-of-way                                               1
02. Dart/dash; 04. Failure to obey traffic signs, signals, or officer                          1
02. Dart/dash; 05. In roadway improperly (standing, lying, working, playing)                   1
02. Dart/dash; 09. Inattentive                                                                 1
02. Dart/dash; 10. Not visible                                                                 1
03. Failure to yield right-of-

In [40]:
TwoCyclist_crashes = imars_passenger.loc[imars_passenger['Non_motorist_action_circumstance_at_time_of_crash'].str.contains("06.")==True]
TwoCyclist_crashes['Non_motorist_action_circumstance_at_time_of_crash']

6550    06. Bicyclists riding 2+ abreast
Name: Non_motorist_action_circumstance_at_time_of_crash, dtype: object

In [41]:
imars_passenger.loc[imars_passenger['Non_motorist_action_circumstance_at_time_of_crash'].isnull()==True].shape

(27620, 37)

In [42]:
#the percentage of passenger records with missing "Non-motorist actions at time of crash" entries is:
round(27620/27931,3)*100

98.9

In [44]:
imars_passenger.Non_motorist_location_at_time_of_crash.value_counts().sort_index()

01. Intersection-marked crosswalk       34
02. Intersection-unmarked crosswalk      3
03. Intersection-other                  21
04. Midblock-marked crosswalk            2
05. Travel lane-other location          66
06. Bicycle lane                        14
07. Shoulder/roadside                   61
08. Sidewalk                             3
09. Median/crossing island               1
10. Driveway access                      3
11. Shared-use path or trail             7
12. Non-roadway area                     6
13. Other                               55
99. Unknown                            145
Name: Non_motorist_location_at_time_of_crash, dtype: int64

In [45]:
imars_passenger.loc[imars_passenger['Non_motorist_location_at_time_of_crash'].isnull()==True].shape

(27510, 37)

In [46]:
#the percentage of passenger records with missing "Non-mortorist locations at time of crash" entries is:
round(27510/27931,3)*100

98.5

In [5]:
imars_crash.Region.value_counts().sort_index()

AKR      66
IMR    1390
MWR     176
NCR     465
NER     428
PWR    1775
SER    1815
Name: Region, dtype: int64

In [6]:
imars_crash.loc[imars_crash['Region'].isnull()==True].shape

(6595, 55)

In [7]:
imars_crash.REGION.value_counts().sort_index()

AK      3
IM     60
MW      3
NC    255
NE     23
PW     52
SE    109
Name: REGION, dtype: int64

In [8]:
imars_crash.loc[imars_crash['REGION'].isnull()==True].shape

(12205, 55)

In [6]:
pd.options.display.max_rows = 10000000
imars_crash_details.AS_Road_Circumstance.value_counts().sort_index()

01. None                                                                                                                                                10101
01. None; 02. Road surface condition (wet, icy, snow, slush, etc.)                                                                                         10
01. None; 03. Debris                                                                                                                                        1
01. None; 03. Debris; 04. Rut, holes, bumps                                                                                                                 1
01. None; 04. Rut, holes, bumps                                                                                                                             2
01. None; 04. Rut, holes, bumps; 06. Worn, travel-polished surface                                                                                          1
01. None; 04. Rut, holes, bumps; 07. Obstruction in 

In [7]:
WorkZone_crashes = imars_crash_details.loc[imars_crash_details['AS_Road_Circumstance'].str.contains("05. Work zone")==True]
WorkZone_crashes['AS_Road_Circumstance']

596                                          05. Work zone
965                                01. None; 05. Work zone
1060                                         05. Work zone
1067     05. Work zone; 09. Shoulders (none, low, soft,...
1136                  04. Rut, holes, bumps; 05. Work zone
1187     05. Work zone; 08. Traffic control device obsc...
1237                                         05. Work zone
1290                                         05. Work zone
1379                               01. None; 05. Work zone
1544                                         05. Work zone
1573                                         05. Work zone
1643     05. Work zone; 09. Shoulders (none, low, soft,...
1717     05. Work zone; 10. Off roadway work lane marki...
1815                                         05. Work zone
1943                                         05. Work zone
2223     02. Road surface condition (wet, icy, snow, sl...
2308     02. Road surface condition (wet, icy, snow, sl.

In [8]:
WorkZone_crashes.shape

(187, 31)

In [10]:
imars_crash_details.loc[imars_crash_details['AS_Road_Circumstance'].isnull()==True].shape

(1460, 31)

In [9]:
imars_crash_details.shape

(15302, 31)

In [11]:
#the percentage of crash records with missing "AS_Road_Circumstances" entries is:
round(1460/15302,3)*100

9.5

In [12]:
imars_crash_details.Work_Zone_Related.value_counts().sort_index()

01. Yes                 271
02. No                11336
03. Not applicable     1004
99. Unknown             403
Name: Work_Zone_Related, dtype: int64

In [13]:
imars_crash_details.loc[imars_crash_details['Work_Zone_Related'].isnull()==True].shape

(2288, 31)

In [14]:
#the percentage of crash records with missing "Work Zone Related" entries is:
round(2288/15302,3)*100

15.0

In [15]:
imars_crash_details.Work_Zone_Workers_Present.value_counts().sort_index()

01. Yes                140
02. No                7947
03. Not applicable    2610
99. Unknown            516
Name: Work_Zone_Workers_Present, dtype: int64

In [16]:
imars_crash_details.loc[imars_crash_details['Work_Zone_Workers_Present'].isnull()==True].shape

(4089, 31)

In [17]:
#the percentage of crash records with missing "Work Zone Workers Present" entries is:
round(4089/15302,3)*100

26.700000000000003

In [18]:
imars_crash_details.Work_Zone_Location.value_counts().sort_index()

01. Not applicable                 10397
02. Lane closure                      80
03. Lane shift or crossover           25
04. Work on shoulder/median           35
05. Intermittent or moving work       45
06. Other                             56
99. Unknown                          558
Name: Work_Zone_Location, dtype: int64

In [19]:
imars_crash_details.loc[imars_crash_details['Work_Zone_Location'].isnull()==True].shape

(4106, 31)

In [20]:
#the percentage of crash records with missing "Work Zone Location" entries is:
round(4106/15302,3)*100

26.8

In [21]:
imars_crash_details.Law_Enforcement_Present_at_Work_Zone.value_counts().sort_index()

01. No                                      7660
02. Officer present                           39
03. Law enforcement vehicle only present       6
04. Not applicable                          3056
99. Unknown                                  454
Name: Law_Enforcement_Present_at_Work_Zone, dtype: int64

In [22]:
imars_crash_details.loc[imars_crash_details['Law_Enforcement_Present_at_Work_Zone'].isnull()==True].shape

(4087, 31)

In [23]:
#the percentage of crash records with missing Law Enforcement Presence at Work Zone entries is:
round(4087/15302,3)*100

26.700000000000003

In [24]:
imars_vehicle.First_Event.value_counts().sort_index()

00. Not applicable                                                         313
01. Overturn/rollover                                                      201
02. Fire/explosion                                                          13
03. Immersion                                                               11
04. Jackknife                                                               10
05. Cargo/equipment loss or shift                                           22
06. Separation of units                                                     13
07. Ran off the road right                                                1084
08. Ran Off the road left                                                  355
09. Cross median or centerline                                             229
10. Downhill runaway                                                        20
11. Fell/jumped from motor vehicle                                          26
12. Avoiding an object on road                      

In [25]:
imars_vehicle.Second_Event.value_counts().sort_index()

00. Not applicable                                                       10369
01. Overturn/rollover                                                      276
02. Fire/explosion                                                           9
03. Immersion                                                                1
04. Jackknife                                                                4
05. Cargo/equipment loss or shift                                            5
06. Separation of units                                                     82
07. Ran off the road right                                                 203
08. Ran Off the road left                                                  152
09. Cross median or centerline                                             106
10. Downhill runaway                                                        21
11. Fell/jumped from motor vehicle                                          56
12. Avoiding an object on road                      

In [26]:
imars_vehicle.Third_Event.value_counts().sort_index()

00. Not applicable                                                       12366
01. Overturn/rollover                                                      179
02. Fire/explosion                                                           7
03. Immersion                                                                2
04. Jackknife                                                                1
05. Cargo/equipment loss or shift                                            7
06. Separation of units                                                     10
07. Ran off the road right                                                  43
08. Ran Off the road left                                                   72
09. Cross median or centerline                                              17
10. Downhill runaway                                                         7
11. Fell/jumped from motor vehicle                                          18
12. Avoiding an object on road                      

In [27]:
imars_vehicle.Fourth_Event.value_counts().sort_index()

00. Not applicable                                                       13026
01. Overturn/rollover                                                       80
02. Fire/explosion                                                           6
05. Cargo/equipment loss or shift                                            4
06. Separation of units                                                     10
07. Ran off the road right                                                   9
08. Ran Off the road left                                                    5
09. Cross median or centerline                                               4
10. Downhill runaway                                                         3
11. Fell/jumped from motor vehicle                                           9
13. Avoiding an animal on road                                              13
16. Other non-collision                                                    242
17. Pedestrian                                      

In [28]:
# total vehicle records in which "24. Work zone/maintenance equipment" was listed as at least one of four events:
37+3+0+0

40

In [30]:
no_Event1 = imars_vehicle.loc[imars_vehicle['First_Event'].isnull()==True]
no_Event1_2 = no_Event1.loc[no_Event1['Second_Event'].isnull()==True]
no_Event1_3 = no_Event1_2.loc[no_Event1_2['Third_Event'].isnull()==True]
no_Event_any = no_Event1_3.loc[no_Event1_3['Fourth_Event'].isnull()==True]
no_Event_any.shape

(2305, 57)

In [31]:
imars_vehicle.shape

(23576, 57)

In [32]:
#the percentage of vehicle records with no "Events" whatsoever
round(2305/23576,3)*100

9.8