## Crash Data Wrangling Jupyter Notebook

**Author:** Eric Englin and Meredith Raymer

**Date:** 11/12/21

**Purpose:** This notebook will combine IMARS datasets over three time periods and change data to merge with CDS and STARS datasets

In [1]:
import pandas as pd
#import geopandas as gpd
import numpy as np
import os

In [2]:
myworkingdirectory = r"C:\Users\eric.englin\Desktop\TSP"
os.chdir(myworkingdirectory)

## Step 1: Concatenate Similar Datasets 

**Note:** Datasets have been divided across three time periods, so these must be combined to start joining the tables together. 

In [3]:
import os
path = './data/IMARS'

files = os.listdir(path)

for f in files:
    print(f)

NPS GOccIvPA Command Address 2011-2015.xlsx
NPS GOccIvPA Command Address 2016 to 4-2018.xlsx
NPS GOccIvPA Command Address 4-2018 to 6-2021.xlsx
NPS MVCOccGPersonReport Command 2011 to 2015.xlsx
NPS MVCOccGPersonReport Command 2016 to 4-2018.xlsx
NPS MVCOccGPersonReport Command 4-2018 to 6-2021.xlsx
NPS MVCOccReportCommand_Classification 2011-2015.xlsx
NPS MVCOccReportCommand_Classification 2016 to 4-2018.xlsx
NPS MVCOccReportCommand_Classification 4-2018 to 6-2021.xlsx
NPS MVCOccVehicleReport Command 2011 to 2015.xlsx
NPS MVCOccVehicleReport Command 2016 to 4-2018.xlsx
NPS MVCOccVehicleReport Command 4-2018 to 6-2021.xlsx


In [4]:
imars_crash_1 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 2011-2015.xlsx", sheet_name = "Batch1-Result1")


In [5]:
imars_crash_1 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 2011-2015.xlsx", sheet_name = "Batch1-Result1")
imars_crash_2 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_crash_3 = pd.read_excel("./data/IMARS/NPS GOccIvPA Command Address 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")

imars_passenger_1 = pd.read_excel("./data/IMARS/NPS MVCOccGPersonReport Command 2011 to 2015.xlsx", sheet_name = "Batch1-Result1")
imars_passenger_2 = pd.read_excel("./data/IMARS/NPS MVCOccGPersonReport Command 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_passenger_3 = pd.read_excel("./data/IMARS/NPS MVCOccGPersonReport Command 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")

imars_vehicle_1 = pd.read_excel("./data/IMARS/NPS MVCOccVehicleReport Command 2011 to 2015.xlsx", sheet_name = "Batch1-Result1")
imars_vehicle_2 = pd.read_excel("./data/IMARS/NPS MVCOccVehicleReport Command 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_vehicle_3 = pd.read_excel("./data/IMARS/NPS MVCOccVehicleReport Command 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")

imars_crash_details_1 = pd.read_excel("./data/IMARS/NPS MVCOccReportCommand_Classification 2011-2015.xlsx", sheet_name = "Batch1-Result1")
imars_crash_details_2 = pd.read_excel("./data/IMARS/NPS MVCOccReportCommand_Classification 2016 to 4-2018.xlsx", sheet_name = "Batch1-Result1")
imars_crash_details_3 = pd.read_excel("./data/IMARS/NPS MVCOccReportCommand_Classification 4-2018 to 6-2021.xlsx", sheet_name = "Batch1-Result1")



In [6]:
imars_crash = pd.concat([imars_crash_1, imars_crash_2, imars_crash_3])
imars_passenger = pd.concat([imars_passenger_1, imars_passenger_2, imars_passenger_3])
imars_vehicle = pd.concat([imars_vehicle_1, imars_vehicle_2, imars_vehicle_3])
imars_crash_details = pd.concat([imars_crash_details_1, imars_crash_details_2, imars_crash_details_3])


In [7]:
imars_crash.shape, imars_crash_1.shape, imars_crash_2.shape, imars_crash_3.shape

((12883, 32), (442, 32), (4454, 32), (7987, 32))

In [8]:
imars_passenger.shape, imars_passenger_1.shape, imars_passenger_2.shape, imars_passenger_3.shape

((19047, 28), (499, 28), (6782, 28), (11766, 28))

In [9]:
imars_vehicle.shape, imars_vehicle_1.shape, imars_vehicle_2.shape,imars_vehicle_3.shape

((16109, 51), (493, 51), (5529, 51), (10087, 51))

In [10]:
imars_crash_details.shape, imars_crash_details_1.shape, imars_crash_details_2.shape, imars_crash_details_3.shape

((11901, 24), (491, 24), (3907, 24), (7503, 24))

In [11]:
imars_crash.head()

Unnamed: 0,IMARS_Record_No,Crash_Date_Time,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,...,Latitude,Longitude,Region,State_Zone,Park,Site,Place,Point,Road_Type_Classification,Linked_Address
0,NP12000378,20120121 00:00:00:000,,,,,,,,,...,,,,,,,,,Highway/road/alley (includes street),
1,NP12000078,20120106 14:30:00:000,,,,,,,,,...,,,,,,,,,,
2,NP12000935,20120215 10:15:00:000,,,,,,,,,...,,,,,,,,,Highway/road/alley (includes street),
3,NP12001003,20120218 10:50:00:000,,,,,,,,,...,,,,,,,,,,
4,NP12000911,20120208 00:00:00:000,,,,,,,,,...,,,,,,,,,Parking lot/garage,


In [12]:
imars_crash.columns

Index(['IMARS_Record_No', 'Crash_Date_Time', 'Linked_Address_Classification',
       'City_Town_Park_Location', 'State', 'County', 'Direction',
       'Linked_Street_Number', 'Linked_Common_Name', 'Street_Type',
       'Direction.1', 'NEAR_Distance_to_MI', 'NEAR_Direction_To',
       'NEAR_Direction', 'NEAR_route_street_road_name', 'NEAR_Road_Type',
       'NEAR_Direction.1', 'AT_Intersection_route_street_road_DIRECTION',
       'At_Intersecting_route_street_road_name', 'AT_Road_Type',
       'AT_Direction', 'Mile_Marker', 'Latitude', 'Longitude', 'Region',
       'State_Zone', 'Park', 'Site', 'Place', 'Point',
       'Road_Type_Classification', 'Linked_Address'],
      dtype='object')

## Add Parks to Crash Details Dataset

**Note:** IMARS does not have park units with each crash, so these will have to be added using the Latitude and Longitude fields in the imars_crash dataset. 

In [13]:
#imars_crash_spatial = gpd.GeoDataFrame(imars_crash,   
#                                geometry=gpd.points_from_xy(imars_crash.Longitude, imars_crash.Latitude))# 3 - Neighbourhoods
#nps_polygons_shapefile = "./data/shapefiles/NPS_-_Land_Resources_Division_Boundary_and_Tract_Data_Service.shp"
#nps_polygons = gpd.read_file(nps_polygons_shapefile)

In [14]:
#sjoined_crashes = gpd.sjoin(imars_crash_spatial, nps_polygons, op="within")

In [15]:
#sjoined_crashes.head()

## Step 2: Merge Datasets

**Notes:** 

- We have 4 datasets and want to join together to find our fields of interest. 
- The vehicle dataset does not have any needed fields so excluding from this section.

In [16]:
# change the name of crash date and time to a unique value for each df
#later we'll use this to check the accuracy of the merge 
imars_crash.rename(columns={'Crash_Date_Time':'Crash_Date_Time_address'}, inplace=True)
imars_passenger.rename(columns={'Crash_Date_Time':'Crash_Date_Time_person'}, inplace=True)
imars_crash_details.rename(columns={'Crash_Date_Time':'Crash_Date_Time_report'}, inplace=True)
imars_vehicle.rename(columns={'Crash_Date_Time':'Crash_Date_Time_vehicle'}, inplace=True)

In [17]:
# here we use merge on the record number to create a new df of both the person file and address file
imars_crash_passenger=imars_crash.merge(imars_passenger, how='left', on='IMARS_Record_No')


In [18]:
imars_crash.shape, imars_passenger.shape, imars_crash_passenger.shape

((12883, 32), (19047, 28), (27065, 59))

In [19]:
# now I want to check that the merge was correct. 
#I'm using the crash date column since this is another shared attribute between the files
imars_crash_passenger['check1']=np.where((imars_crash_passenger['Crash_Date_Time_address']==imars_crash_passenger['Crash_Date_Time_person']), 0, 1)
imars_crash_passenger['check1'].sum()

15

In [20]:
# this means that 15 rows had different crash times for the same record
#lets look into these more with a df that is only these rows
imars_crash_passengerWrongDate=imars_crash_passenger[imars_crash_passenger['check1']==1]
pd.set_option('display.max_columns', None)
imars_crash_passengerWrongDate.insert(2, 'Crash_Date_Time_person', imars_crash_passengerWrongDate['Crash_Date_Time_person'], allow_duplicates = True)
imars_crash_passengerWrongDate

Unnamed: 0,IMARS_Record_No,Crash_Date_Time_address,Crash_Date_Time_person,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,Direction.1,NEAR_Distance_to_MI,NEAR_Direction_To,NEAR_Direction,NEAR_route_street_road_name,NEAR_Road_Type,NEAR_Direction.1,AT_Intersection_route_street_road_DIRECTION,At_Intersecting_route_street_road_name,AT_Road_Type,AT_Direction,Mile_Marker,Latitude,Longitude,Region,State_Zone,Park,Site,Place,Point,Road_Type_Classification,Linked_Address,Crash_Date_Time_person.1,Driver_Action,Driver_Condition,Driver_Distraction,Suspect_Alcohol,Alcohol_Test,Alcohol_Test_Result_1,Alcohol_Test_Result_2,Suspect_Drugs,Drug_Test,Violations_Issued,Seat_Position,Injury_Severity,Air_Bag_Deployed,Ejection,Injury_Transported_By,Safety_Equipment_Used,Vehicle_number_striking_non_motorist,Injured_transported_by,Non_motorist_safety_equipment,Non_motorist_action_circumstance_prior_to_crash,Non_motorist_action_circumstance_at_time_of_crash,Non_motorist_condition_at_time_of_crash,Non_motorist_distraction,Non_motorist_location_at_time_of_crash,Pedestrian_Type,Pedestrian_Type_Detail,check1
14374,NP18033157,20180325 10:23:00:000,20180325 10:25:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20180325 10:25:00:000,,,,,,,,,,,01. Driver,,,,,,,,,,,,,,,,1
14440,NP18033157,20180325 10:25:00:000,20180325 10:23:00:000,,,,,,,,,,,,,,,,,,,,33.0,19.389,155.34,,,,,,,,,20180325 10:23:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,1
14481,NP18040002,20180409 16:45:00:000,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,338.0,34.990712,-87.821509,,,,,,,,,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,,,,,,,,1
14851,NP18073139,20180604 15:42:00:000,20180604 14:21:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,,20180604 14:21:00:000,,,,,,,,,,,01. Driver,,,,,,1.0,,,,,,,,,,1
14852,NP18073139,20180604 15:42:00:000,20180604 14:21:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,,20180604 14:21:00:000,,,,,,,,,,,01. Driver,,,,,,2.0,,,,,,,,,,1
14853,NP18073139,20180604 15:42:00:000,20180604 14:21:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,,20180604 14:21:00:000,,,,,,,,,,,03. Front row right,03. Non-incapacitating injury,01. Not deployed,,01. Not transported,98. Not applicable,2.0,01. Not transported,,,,,,,,,1
14854,NP18073139,20180604 15:42:00:000,20180604 14:21:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,,20180604 14:21:00:000,,,,,,,,,,,01. Driver,,,,,,3.0,,,,,,,,,,1
14855,NP18073139,20180604 15:42:00:000,20180604 14:21:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,,20180604 14:21:00:000,,,,,,,,,,,,,,,,,3.0,,,,,,,,,,1
14868,NP18073139,20180604 14:21:00:000,20180604 15:42:00:000,,,,,,,,,,,,,,,,,,,,,25.760635,-80.625879,,,,,,,,,20180604 15:42:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,1
16787,NP19015066,20190214 19:03:37:000,20190214 19:00:00:000,,,,,,,,,,,,,,,,,,,,,44.955403,-110.59452,,,,,,,Highway/road/alley (includes street),,20190214 19:00:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,1


In [21]:
# since the columns are mostly the same, the differences are very small, we'll keep merging the other files 
imars_crash_passenger_details=imars_crash_passenger.merge(imars_crash_details, how='left', on='IMARS_Record_No')

In [22]:
imars_crash_passenger.shape, imars_crash_details.shape, imars_crash_passenger_details.shape

((27065, 60), (11901, 24), (52762, 83))

In [23]:
#checking the date time again
imars_crash_passenger_details['check2']=np.where((imars_crash_passenger_details['Crash_Date_Time_address']==imars_crash_passenger_details['Crash_Date_Time_report']), 0, 1)
imars_crash_passenger_details['check2'].sum()

33

In [24]:
#looking at the 33 differences
imars_crash_passenger_detailsWrongDate=imars_crash_passenger_details[imars_crash_passenger_details['check2']==1]
pd.set_option('display.max_columns', None)
imars_crash_passenger_detailsWrongDate.insert(2, 'Crash_Date_Time_report', imars_crash_passenger_detailsWrongDate['Crash_Date_Time_report'], allow_duplicates = True)
imars_crash_passenger_detailsWrongDate

Unnamed: 0,IMARS_Record_No,Crash_Date_Time_address,Crash_Date_Time_report,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,Direction.1,NEAR_Distance_to_MI,NEAR_Direction_To,NEAR_Direction,NEAR_route_street_road_name,NEAR_Road_Type,NEAR_Direction.1,AT_Intersection_route_street_road_DIRECTION,At_Intersecting_route_street_road_name,AT_Road_Type,AT_Direction,Mile_Marker,Latitude,Longitude,Region,State_Zone,Park,Site,Place,Point,Road_Type_Classification,Linked_Address,Crash_Date_Time_person,Driver_Action,Driver_Condition,Driver_Distraction,Suspect_Alcohol,Alcohol_Test,Alcohol_Test_Result_1,Alcohol_Test_Result_2,Suspect_Drugs,Drug_Test,Violations_Issued,Seat_Position,Injury_Severity,Air_Bag_Deployed,Ejection,Injury_Transported_By,Safety_Equipment_Used,Vehicle_number_striking_non_motorist,Injured_transported_by,Non_motorist_safety_equipment,Non_motorist_action_circumstance_prior_to_crash,Non_motorist_action_circumstance_at_time_of_crash,Non_motorist_condition_at_time_of_crash,Non_motorist_distraction,Non_motorist_location_at_time_of_crash,Pedestrian_Type,Pedestrian_Type_Detail,check1,Crash_Date_Time_report.1,Number_of_Vehicles_Involved,Injury_or_Fatal_Crash,Investigated_at_Scene,Hit_and_Run,Non_Motor_Vehicl_Property_Damage,Amount_of_Property_Damage,First_Harmful_Event_Type,First_Harmful_Event,Location_of_First_Harmful_Event,Weather,Roadway_Condition,Lighting,School_Bus_related,AS_Road_Circumstance,Environmental_Contributing_Circumstances,Work_Zone_Related,Work_Zone_Workers_Present,Work_Zone_Location,Law_Enforcement_Present_at_Work_Zone,Relation_to_Junction,Type_of_Intersection,Manner_of_Collision,check2
39720,NP18033157,20180325 10:23:00:000,20180325 10:25:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20180325 10:23:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,0,20180325 10:25:00:000,1.0,02. No,Yes,No,No,Over $1000,Collision with fixed object,"60. Rock, boulder, rock slide",02. Shoulder,02. Raining; 04. Fog,02. Wet,01. Daylight,01. No,"02. Road surface condition (wet, icy, snow, sl...",02. Weather,02. No,02. No,01. Not applicable,01. No,,,08. Other,1
39722,NP18033157,20180325 10:23:00:000,20180325 10:25:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20180325 10:25:00:000,,,,,,,,,,,01. Driver,,,,,,,,,,,,,,,,1,20180325 10:25:00:000,1.0,02. No,Yes,No,No,Over $1000,Collision with fixed object,"60. Rock, boulder, rock slide",02. Shoulder,02. Raining; 04. Fog,02. Wet,01. Daylight,01. No,"02. Road surface condition (wet, icy, snow, sl...",02. Weather,02. No,02. No,01. Not applicable,01. No,,,08. Other,1
39856,NP18033157,20180325 10:25:00:000,20180325 10:23:00:000,,,,,,,,,,,,,,,,,,,,33.0,19.389,155.34,,,,,,,,,20180325 10:23:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,1,20180325 10:23:00:000,1.0,02. No,Yes,No,No,Over $1000,Collision with fixed object,"60. Rock, boulder, rock slide",02. Shoulder,02. Raining,02. Wet,07. Other,01. No,"02. Road surface condition (wet, icy, snow, sl...",02. Weather,02. No,,,,,,08. Other,1
39858,NP18033157,20180325 10:25:00:000,20180325 10:23:00:000,,,,,,,,,,,,,,,,,,,,33.0,19.389,155.34,,,,,,,,,20180325 10:25:00:000,,,,,,,,,,,01. Driver,,,,,,,,,,,,,,,,0,20180325 10:23:00:000,1.0,02. No,Yes,No,No,Over $1000,Collision with fixed object,"60. Rock, boulder, rock slide",02. Shoulder,02. Raining,02. Wet,07. Other,01. No,"02. Road surface condition (wet, icy, snow, sl...",02. Weather,02. No,,,,,,08. Other,1
39927,NP18040002,20180409 16:45:00:000,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,338.0,34.990712,-87.821509,,,,,,,,,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,,,,,,,,1,20180409 16:44:25:000,2.0,01. Yes,Yes,No,No,Over $1000,"Collision with person, MV or non-fixed object",21. Motor vehicle in transport,01. On roadway,01. Clear,01. Dry,01. Daylight,01. No,01. None,01. None,02. No,,,,02. Intersection,02. Four (4)-way intersection,08. Other,1
39929,NP18040002,20180409 16:45:00:000,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,338.0,34.990712,-87.821509,,,,,,,,,20180409 16:45:00:000,03. Failed to yield to right-of-way; 05. Ran s...,01. Apparently normal,07. Unknown if distracted,01. No,1. No test performed,,,01. No,1. No test performed,17. Disregard stop sign; 24. No insurance,01. Driver,04. Incapacitating injury,05. Deployed: combination,01. Apparently normal,06. Other (private MV),99. Unknown,1.0,06. Other (private MV),,,,,,,,,0,20180409 16:44:25:000,2.0,01. Yes,Yes,No,No,Over $1000,"Collision with person, MV or non-fixed object",21. Motor vehicle in transport,01. On roadway,01. Clear,01. Dry,01. Daylight,01. No,01. None,01. None,02. No,,,,02. Intersection,02. Four (4)-way intersection,08. Other,1
39931,NP18040002,20180409 16:45:00:000,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,338.0,34.990712,-87.821509,,,,,,,,,20180409 16:45:00:000,01. No contributing action,01. Apparently normal,01. Not distracted,01. No,1. No test performed,,,01. No,1. No test performed,01. None,01. Driver,01. No injury,05. Deployed: combination,01. Apparently normal,01. Not transported,99. Unknown,2.0,01. Not transported,,,,,,,,,0,20180409 16:44:25:000,2.0,01. Yes,Yes,No,No,Over $1000,"Collision with person, MV or non-fixed object",21. Motor vehicle in transport,01. On roadway,01. Clear,01. Dry,01. Daylight,01. No,01. None,01. None,02. No,,,,02. Intersection,02. Four (4)-way intersection,08. Other,1
39933,NP18040002,20180409 16:45:00:000,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,338.0,34.990712,-87.821509,,,,,,,,,20180409 16:45:00:000,,,,,,,,,,,07. Second row right,01. No injury,05. Deployed: combination,,01. Not transported,99. Unknown,2.0,01. Not transported,,,,,,,,,0,20180409 16:44:25:000,2.0,01. Yes,Yes,No,No,Over $1000,"Collision with person, MV or non-fixed object",21. Motor vehicle in transport,01. On roadway,01. Clear,01. Dry,01. Daylight,01. No,01. None,01. None,02. No,,,,02. Intersection,02. Four (4)-way intersection,08. Other,1
39935,NP18040002,20180409 16:45:00:000,20180409 16:44:25:000,,,,,,,,,,,,,,,,,,,,338.0,34.990712,-87.821509,,,,,,,,,20180409 16:45:00:000,,,,,,,,,,,03. Front row right,04. Incapacitating injury,02. Deployed: front,,02. EMS (ground),02. Shoulder and lap belt used,2.0,02. EMS (ground),,,,,,,,,0,20180409 16:44:25:000,2.0,01. Yes,Yes,No,No,Over $1000,"Collision with person, MV or non-fixed object",21. Motor vehicle in transport,01. On roadway,01. Clear,01. Dry,01. Daylight,01. No,01. None,01. None,02. No,,,,02. Intersection,02. Four (4)-way intersection,08. Other,1
40314,NP18073139,20180604 15:42:00:000,20180604 14:21:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,,20180604 15:42:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,0,20180604 14:21:00:000,3.0,01. Yes,Yes,No,Yes,Unknown,"Collision with person, MV or non-fixed object",21. Motor vehicle in transport,07. Parking lot,01. Clear,01. Dry,01. Daylight,01. No,01. None,01. None,02. No,02. No,01. Not applicable,01. No,01. Non-junction,01. Not an intersection,"04. Sideswipe, same direction",1


## Step 3: Filter for Necessary Fields, Group by IMARS_RECORD_NO


In [25]:
imars_crash_passenger_details.columns

Index(['IMARS_Record_No', 'Crash_Date_Time_address',
       'Linked_Address_Classification', 'City_Town_Park_Location', 'State',
       'County', 'Direction', 'Linked_Street_Number', 'Linked_Common_Name',
       'Street_Type', 'Direction.1', 'NEAR_Distance_to_MI',
       'NEAR_Direction_To', 'NEAR_Direction', 'NEAR_route_street_road_name',
       'NEAR_Road_Type', 'NEAR_Direction.1',
       'AT_Intersection_route_street_road_DIRECTION',
       'At_Intersecting_route_street_road_name', 'AT_Road_Type',
       'AT_Direction', 'Mile_Marker', 'Latitude', 'Longitude', 'Region',
       'State_Zone', 'Park', 'Site', 'Place', 'Point',
       'Road_Type_Classification', 'Linked_Address', 'Crash_Date_Time_person',
       'Driver_Action', 'Driver_Condition', 'Driver_Distraction',
       'Suspect_Alcohol', 'Alcohol_Test', 'Alcohol_Test_Result_1',
       'Alcohol_Test_Result_2', 'Suspect_Drugs', 'Drug_Test',
       'Violations_Issued', 'Seat_Position', 'Injury_Severity',
       'Air_Bag_Deployed', '

In [26]:
imars_crash_passenger_details['NUM_OCC'] = 1
imars_crash_passenger_details['INCID_NO'] = imars_crash_passenger_details['IMARS_Record_No']
imars_slim = imars_crash_passenger_details[[
    'INCID_NO', 'NUM_OCC', 'State', 'Crash_Date_Time_report', 'Latitude', 'Longitude','First_Harmful_Event_Type',
    'First_Harmful_Event','Injury_Severity'
]]

In [57]:
imars_slim.head()

Unnamed: 0,INCID_NO,NUM_OCC,State,Crash_Date_Time_report,Latitude,Longitude,First_Harmful_Event_Type,First_Harmful_Event,Injury_Severity,Collision with Fixed Object,Collision with Animal,Non-Collision,Other Accident Class,Collision with Other Motor Vehicle,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Other Object,Collision with Unknown,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury
0,NP12000378,1,,20120121 00:00:00:000,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,NP12000078,1,,20120106 14:30:00:000,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,NP12000935,1,,20120215 10:15:00:000,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,NP12001003,1,,20120218 10:50:00:000,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,NP12000911,1,,20120208 00:00:00:000,,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Creating New Columns for Accident Class

In [28]:
imars_slim.First_Harmful_Event_Type.value_counts()

Collision with person, MV or non-fixed object    26239
Collision with fixed object                      10265
Collision with animals                            8473
Non-collision                                     4035
Unknown                                           2987
Name: First_Harmful_Event_Type, dtype: int64

In [29]:
imars_slim.loc[imars_slim['First_Harmful_Event_Type']=="Collision with person, MV or non-fixed object"].First_Harmful_Event.value_counts()

21. Motor vehicle in transport                                           16421
22. Parked motor vehicle                                                  7424
25. Other non-fixed object                                                1079
18. Bicycle                                                                553
17. Pedestrian                                                             140
24. Work zone/maintenance equipment                                         53
23. Struck by falling, shifting cargo or anything set in motion by MV       30
20. Railway vehicle                                                          8
99. Unknown                                                                  1
Name: First_Harmful_Event, dtype: int64

In [30]:
pd.options.mode.chained_assignment = None  # default='warn'

imars_slim['Collision with Fixed Object']= np.where(imars_slim['First_Harmful_Event_Type']=="Collision with fixed object", 1,0)
imars_slim['Collision with Animal']= np.where(imars_slim['First_Harmful_Event_Type']=="Collision with animals", 1,0)
imars_slim['Non-Collision']= np.where(imars_slim['First_Harmful_Event_Type']=="Non-collision", 1,0)
imars_slim['Other Accident Class']= np.where(imars_slim['First_Harmful_Event_Type']=="Unknown",1,0)


imars_slim['Collision with Other Motor Vehicle']= np.where(imars_slim['First_Harmful_Event']=='21. Motor vehicle in transport', 1,0)
imars_slim['Collision with Pedestrian']= np.where(imars_slim['First_Harmful_Event']=="17. Pedestrian", 1,0)
imars_slim['Collision with Bicycle']= np.where(imars_slim['First_Harmful_Event']=="18. Bicycle", 1,0)
imars_slim['Collision with Parked Motor Vehicle']= np.where(imars_slim['First_Harmful_Event']=="22. Parked motor vehicle", 1,0)
imars_slim['Collision with Railway Train']= np.where(imars_slim['First_Harmful_Event']=="20. Railway vehicle", 1,0)
imars_slim['Collision with Other Object']= np.where(imars_slim['First_Harmful_Event']=="25. Other non-fixed object", 1,0)
imars_slim['Collision with Unknown']= np.where(imars_slim['First_Harmful_Event'].isin(['23. Struck by falling, shifting cargo or anything set in motion by MV',
                                                                                              '24. Work zone/maintenance equipment']), 1,0)


In [31]:
imars_slim['Non-Collision'].value_counts()

0    48727
1     4035
Name: Non-Collision, dtype: int64

#### Creating New Columns for Injury Severity

In [32]:
imars_slim.Injury_Severity.value_counts()

01. No injury                    29044
99. Unknown                       5398
03. Non-incapacitating injury     2095
02. Possible injury               2021
04. Incapacitating injury         1272
05. Fatal                          966
Name: Injury_Severity, dtype: int64

In [33]:
imars_slim['No Injury']= np.where(imars_slim['Injury_Severity']=='01. No injury', 1,0)
imars_slim['Possible Injury']= np.where(imars_slim['Injury_Severity']=='02. Possible injury', 1,0)
imars_slim['Non-incapacitating Injury']= np.where(imars_slim['Injury_Severity']=='03. Non-incapacitating injury', 1,0)
imars_slim['Incapacitating Injury']= np.where(imars_slim['Injury_Severity']=='04. Incapacitating injury', 1,0)
imars_slim['Fatality']= np.where(imars_slim['Injury_Severity']=='05. Fatal', 1,0)
imars_slim['Unknown Injury']= np.where(imars_slim['Injury_Severity']=='99. Unknown', 1,0)


In [108]:
imars_slim_agg = imars_slim.groupby(by=['INCID_NO']).sum()
#imars_slim_agg = imars_slim_agg.drop(columns = ['ACCLASS','DINJ', 'PINJ'])
imars_slim_agg = imars_slim_agg.reset_index()


In [109]:
imars_slim_agg.head()

Unnamed: 0,INCID_NO,NUM_OCC,Latitude,Longitude,Collision with Fixed Object,Collision with Animal,Non-Collision,Other Accident Class,Collision with Other Motor Vehicle,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Other Object,Collision with Unknown,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury
0,NP12000078,1,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,NP12000378,1,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,NP12000550,1,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,NP12000911,1,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,NP12000935,1,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [110]:
imars_slim.shape, imars_slim_agg.shape, imars_crash_details.shape

((52762, 26), (10934, 21), (11901, 24))

In [111]:
imars_slim_agg = imars_slim_agg[['INCID_NO', 'NUM_OCC',  'Non-Collision',
       'Collision with Other Motor Vehicle', 'Collision with Fixed Object',
       'Collision with Pedestrian', 'Collision with Bicycle',
       'Collision with Parked Motor Vehicle', 'Collision with Railway Train',
       'Collision with Animal', 'Collision with Other Object',
       'Collision with Unknown', 'Other Accident Class', 'No Injury',
       'Possible Injury', 'Non-incapacitating Injury', 'Incapacitating Injury',
       'Fatality', 'Unknown Injury']]

In [112]:
imars_crash.head()

Unnamed: 0,IMARS_Record_No,Crash_Date_Time_address,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,Direction.1,NEAR_Distance_to_MI,NEAR_Direction_To,NEAR_Direction,NEAR_route_street_road_name,NEAR_Road_Type,NEAR_Direction.1,AT_Intersection_route_street_road_DIRECTION,At_Intersecting_route_street_road_name,AT_Road_Type,AT_Direction,Mile_Marker,Latitude,Longitude,Region,State_Zone,Park,Site,Place,Point,Road_Type_Classification,Linked_Address
0,NP12000378,20120121 00:00:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Highway/road/alley (includes street),
1,NP12000078,20120106 14:30:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,NP12000935,20120215 10:15:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Highway/road/alley (includes street),
3,NP12001003,20120218 10:50:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,NP12000911,20120208 00:00:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,


In [114]:
imars_slim_agg=imars_slim_agg.merge(imars_crash, how='left', left_on='INCID_NO',right_on='IMARS_Record_No')


In [115]:
imars_slim_agg.head()

Unnamed: 0,INCID_NO,NUM_OCC,Non-Collision,Collision with Other Motor Vehicle,Collision with Fixed Object,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Animal,Collision with Other Object,Collision with Unknown,Other Accident Class,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,IMARS_Record_No,Crash_Date_Time_address,Linked_Address_Classification,City_Town_Park_Location,State,County,Direction,Linked_Street_Number,Linked_Common_Name,Street_Type,Direction.1,NEAR_Distance_to_MI,NEAR_Direction_To,NEAR_Direction,NEAR_route_street_road_name,NEAR_Road_Type,NEAR_Direction.1,AT_Intersection_route_street_road_DIRECTION,At_Intersecting_route_street_road_name,AT_Road_Type,AT_Direction,Mile_Marker,Latitude,Longitude,Region,State_Zone,Park,Site,Place,Point,Road_Type_Classification,Linked_Address
0,NP12000078,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NP12000078,20120106 14:30:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,NP12000378,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NP12000378,20120121 00:00:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Highway/road/alley (includes street),
2,NP12000550,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NP12000550,20120131 13:51:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Highway/road/alley (includes street),
3,NP12000911,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NP12000911,20120208 00:00:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Parking lot/garage,
4,NP12000935,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,NP12000935,20120215 10:15:00:000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Highway/road/alley (includes street),


In [116]:
imars_slim_agg.columns

Index(['INCID_NO', 'NUM_OCC', 'Non-Collision',
       'Collision with Other Motor Vehicle', 'Collision with Fixed Object',
       'Collision with Pedestrian', 'Collision with Bicycle',
       'Collision with Parked Motor Vehicle', 'Collision with Railway Train',
       'Collision with Animal', 'Collision with Other Object',
       'Collision with Unknown', 'Other Accident Class', 'No Injury',
       'Possible Injury', 'Non-incapacitating Injury', 'Incapacitating Injury',
       'Fatality', 'Unknown Injury', 'IMARS_Record_No',
       'Crash_Date_Time_address', 'Linked_Address_Classification',
       'City_Town_Park_Location', 'State', 'County', 'Direction',
       'Linked_Street_Number', 'Linked_Common_Name', 'Street_Type',
       'Direction.1', 'NEAR_Distance_to_MI', 'NEAR_Direction_To',
       'NEAR_Direction', 'NEAR_route_street_road_name', 'NEAR_Road_Type',
       'NEAR_Direction.1', 'AT_Intersection_route_street_road_DIRECTION',
       'At_Intersecting_route_street_road_name', 'AT_

In [117]:
imars_slim_agg = imars_slim_agg[['INCID_NO', 'NUM_OCC', 'Non-Collision',
       'Collision with Other Motor Vehicle', 'Collision with Fixed Object',
       'Collision with Pedestrian', 'Collision with Bicycle',
       'Collision with Parked Motor Vehicle', 'Collision with Railway Train',
       'Collision with Animal', 'Collision with Other Object',
       'Collision with Unknown', 'Other Accident Class', 'No Injury',
       'Possible Injury', 'Non-incapacitating Injury', 'Incapacitating Injury',
       'Fatality', 'Unknown Injury', 'Latitude', 'Longitude', 'Crash_Date_Time_address']]

In [118]:
imars_slim_agg.head()

Unnamed: 0,INCID_NO,NUM_OCC,Non-Collision,Collision with Other Motor Vehicle,Collision with Fixed Object,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Animal,Collision with Other Object,Collision with Unknown,Other Accident Class,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,Latitude,Longitude,Crash_Date_Time_address
0,NP12000078,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20120106 14:30:00:000
1,NP12000378,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20120121 00:00:00:000
2,NP12000550,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20120131 13:51:00:000
3,NP12000911,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20120208 00:00:00:000
4,NP12000935,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,20120215 10:15:00:000


In [119]:
imars_slim_agg.to_csv("./data/crash_data_IMARS.csv",index=False)

### Lat/Long Cleaning
- Some longitudes are positive when they should be negative
- Some entries have switched their lat/lon values
- Some entries have both of these issues


In [43]:
imars_slim_agg_nonulls = imars_slim_agg.dropna(subset=['Latitude','Longitude'])

In [44]:
imars_slim_agg_nonulls.shape, imars_slim_agg.shape

((5718, 21), (12883, 21))

In [62]:
imars_slim_agg_nonulls['Longitude2']= -imars_slim_agg_nonulls['Longitude'].abs()
imars_slim_agg_nonulls.to_csv("./data/crash_data_IMARS_NoNulls_new.csv",index=False)
# Note: I took this output and moved into ArcGIS Pro for spatial join

In [61]:
imars_slim_agg_nonulls.head()

Unnamed: 0,INCID_NO,NUM_OCC,Non-Collision,Collision with Other Motor Vehicle,Collision with Fixed Object,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Animal,Collision with Other Object,Collision with Unknown,Other Accident Class,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,Latitude,Longitude,Longitude2
410,NP14038270,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0.0,0.0,-0.0
411,NP14039836,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47.966784,123.857821,-123.857821
414,NP14049772,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48.079397,123.429762,-123.429762
415,NP14060606,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48.084167,-123.431944,-123.431944
417,NP14103966,7,0,7,0,0,0,0,0,0,0,0,0,0,1,0,5,1,0,35.18803,-87.38101,-87.38101


In [63]:
imars_slim_agg_nonulls['Longitude'].min(), imars_slim_agg_nonulls['Longitude'].max()


(-157.937701, 156.284017)

In [64]:
## Under construction if we want to do more lat/lon cleanup
## This would be for entries where lat and lon values were switched (i.e. lat value is actually the lon value)
#imars_slim['Longitude2']= imars_slim['Latitude']
#imars_slim['Latitude2']= imars_slim['Longitude2']

#np.where(imars_slim['Longitude2']==np.where(imars_slim['Longitude2']>-60, imars_slim['Longitude2'],0)


In [120]:
imars_slim_agg_nonulls_withParkCodes = pd.read_csv("./data/crash_data_IMARS_NoNulls_new_withParkCodes.csv")

In [121]:
imars_slim_agg_nonulls_withParkCodes.head()

Unnamed: 0,OBJECTID *,Shape *,Join_Count,TARGET_FID,INCID_NO,NUM_OCC,Non-Collision,Collision with Other Motor Vehicle,Collision with Fixed Object,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Animal,Collision with Other Object,Collision with Unknown,Other Accident Class,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,Latitude,Longitude,Longitude2,OBJECTID,UNIT_CODE,GIS_Notes,UNIT_NAME,DATE_EDIT,STATE,REGION,GNIS_ID,UNIT_TYPE,CREATED_BY,METADATA,PARKNAME,CreationDa,Creator,EditDate,Editor,Shape__Are,Shape__Len,GlobalID,BUFF_DIST,ORIG_FID
0,1,Point,0,1,NP14038270,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0.0,0.0,0.0,1,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>
1,2,Point,1,2,NP14039836,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,47.966784,123.857821,-123.857821,2,OLYM,PRELIMINARY - Data has not completed the entir...,Olympic National Park,4/22/2015,WA,PW,1530459,National Park,Lands,Preliminary data. Contact the Land Resources P...,Olympic,1/9/2020,SCarlton@nps.gov_nps,1/9/2020,SCarlton@nps.gov_nps,8203590772,1158605.378,7e8a1a75-5e1c-4bef-8439-4f6cb149a10a,1500,330
2,3,Point,1,3,NP14049772,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48.079397,123.429762,-123.429762,3,OLYM,PRELIMINARY - Data has not completed the entir...,Olympic National Park,4/22/2015,WA,PW,1530459,National Park,Lands,Preliminary data. Contact the Land Resources P...,Olympic,1/9/2020,SCarlton@nps.gov_nps,1/9/2020,SCarlton@nps.gov_nps,8203590772,1158605.378,7e8a1a75-5e1c-4bef-8439-4f6cb149a10a,1500,330
3,4,Point,1,4,NP14060606,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,48.084167,-123.431944,-123.431944,4,OLYM,PRELIMINARY - Data has not completed the entir...,Olympic National Park,4/22/2015,WA,PW,1530459,National Park,Lands,Preliminary data. Contact the Land Resources P...,Olympic,1/9/2020,SCarlton@nps.gov_nps,1/9/2020,SCarlton@nps.gov_nps,8203590772,1158605.378,7e8a1a75-5e1c-4bef-8439-4f6cb149a10a,1500,330
4,5,Point,0,5,NP14103966,7,0,7,0,0,0,0,0,0,0,0,0,0,1,0,5,1,0,35.18803,-87.38101,-87.38101,5,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>,<Null>


In [122]:
imars_slim_agg_nonulls_withParkCodes['PARK_ALPHA'] = imars_slim_agg_nonulls_withParkCodes['UNIT_CODE']
imars_slim_agg_nonulls_withParkCodes['STATE_CODE'] = imars_slim_agg_nonulls_withParkCodes['STATE'] 


In [123]:
imars_slim_agg_nonulls_withParkCodes = imars_slim_agg_nonulls_withParkCodes[['INCID_NO','PARK_ALPHA',
                                                                            'STATE_CODE']]

In [124]:
imars_slim_agg = imars_slim_agg.merge(imars_slim_agg_nonulls_withParkCodes, on='INCID_NO',
                                     how='left')

In [125]:
imars_slim_agg.shape, imars_slim_agg.loc[imars_slim_agg['PARK_ALPHA'].isnull()==False].shape

((13881, 24), (6981, 24))

In [126]:
imars_slim_agg.columns

Index(['INCID_NO', 'NUM_OCC', 'Non-Collision',
       'Collision with Other Motor Vehicle', 'Collision with Fixed Object',
       'Collision with Pedestrian', 'Collision with Bicycle',
       'Collision with Parked Motor Vehicle', 'Collision with Railway Train',
       'Collision with Animal', 'Collision with Other Object',
       'Collision with Unknown', 'Other Accident Class', 'No Injury',
       'Possible Injury', 'Non-incapacitating Injury', 'Incapacitating Injury',
       'Fatality', 'Unknown Injury', 'Latitude', 'Longitude',
       'Crash_Date_Time_address', 'PARK_ALPHA', 'STATE_CODE'],
      dtype='object')

In [127]:
imars_slim_agg.tail()

Unnamed: 0,INCID_NO,NUM_OCC,Non-Collision,Collision with Other Motor Vehicle,Collision with Fixed Object,Collision with Pedestrian,Collision with Bicycle,Collision with Parked Motor Vehicle,Collision with Railway Train,Collision with Animal,Collision with Other Object,Collision with Unknown,Other Accident Class,No Injury,Possible Injury,Non-incapacitating Injury,Incapacitating Injury,Fatality,Unknown Injury,Latitude,Longitude,Crash_Date_Time_address,PARK_ALPHA,STATE_CODE
13876,NP21169469,4,0,0,0,0,0,0,0,4,0,0,0,4,0,0,0,0,0,36.323889,-112.113056,20200809 20:00:00:000,GRCA,AZ
13877,NP21226422,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,,,20201023 14:55:00:000,,
13878,NP21226428,2,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,,,20200820 16:03:00:000,,
13879,NP21228215,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,61.383883,-143.2848,20210621 20:30:00:000,WRST,AK
13880,NP21239268,4,0,0,0,0,0,0,0,4,0,0,0,3,1,0,0,0,0,,,20201001 17:30:00:000,,


In [128]:
imars_slim_agg.to_csv("crash_data_IMARS_clean.csv",index=False)