In [33]:
import pandas as pd
import pymc3 as pm
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
crash = pd.read_csv('../DS_6040_Project/CrashData_Basic_With_Roads_BlockGroups.csv', low_memory=False)

In [3]:
weather_dict = {1: 'No Adverse Conditions',
3: 'Fog',
4: 'Mist',
5: 'Rain',
6: 'Snow',
7: 'Sleet/Hail',
8: 'Smoke/Dust',
9: 'Other',
10: 'Blowing Sand/Soil/Dirt/Snow',
11: 'Severe Crosswinds',
99: 'Not Applicable'}

In [4]:
light_condition = {1: 'Dawn',
                   2: 'Daylight',
                   3: 'Dusk',
                   4: 'Darkness-Road Lighted',
                   5: 'Darkness-Road not Lighted',
                   6: 'Darkness-Road Lighting Unknown',
                   7: 'Unknown'}

In [5]:
alignment_dict = {1: 'Straight-Level',
3: 'Grade-Level',
2: 'Curve-Level',
4: 'Grade-Curve',
5: 'Hillcrest-Straight',
6: 'Hillcrest-Curve',
7: 'Dip-Straight',
8: 'Dip-Curve',
9: 'Other',
10: 'On/Off Ramp'}

In [6]:
surface_type = {1: 'Concrete',
                   2: 'Blacktop, Asphalt, Bituminous',
                   3: 'Brick or Block',
                   4: 'Slag, Gravel, Stone',
                   5: 'Dirt',
                   6: 'Other'}

In [7]:
defect_dict = {1: 'No Defects',
3: 'Soft or Low Shoulder',
2: 'Holes, Ruts, Bumps',
4: 'Under Repair',
5: 'Loose Material',
6: 'Restricted Width',
7: 'Slick Pavement',
8: 'Roadway Obstructed',
9: 'Other',
10: 'Edge Pavement Dropoff'}

In [8]:
description_dict = {1: 'Two-Way, Not Divided',
3: 'Two-Way, Divided, Positive Median Barrier',
2: 'Two-Way, Divided, Unprotected Median',
4: 'One-Way, Not Divided',
5: 'Other'}

In [9]:
traffic_status = {1: 'Yes - Working',
                   2: 'Yes - Working and Obscured',
                   3: 'Yes - Not Working',
                   4: 'Yes - Not Working and Obscured',
                   5: 'Yes - Missing',
                   6: 'No Traffic Control Device Present'}

In [10]:
workzone_dict = {1: 'Lane Closure',
3: 'Work on Shoulder or Median',
2: 'Lane Shift/Crossover',
4: 'Intermittent or Moving Work',
5: 'Other',
0: 'Non Workzone'}

In [11]:
area = {1: 'Urban',
        0: 'Rural'}

In [12]:
severity_dict = {'O': 'Property Damage Only',
'B': 'Visible Injury',
'C': 'Non Visible Injury',
'A': 'Severe Injury',
'K': 'Fatal Injury'}

In [13]:
population = pd.read_csv('population.csv')

In [14]:
crash.head()

Unnamed: 0,OID_,Join_Count,TARGET_FID,CRASH_DT,CRASH_SEVERITY,WEATHER_CONDITION,LIGHT_CONDITION,RD_TYPE,ROADWAY_SURFACE_COND,ROADWAY_ALIGNMENT,...,SEGMENT_EXISTS,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,GEOID,Shape_Length_1,GEOID_1,B01001e1,B01001m1
0,1,1.0,346205.0,8/31/2015 5:00:00,O,1,4,0,1,1,...,,51.0,660.0,205.0,1.0,516600000000.0,0.103457,15000US516600002051,6039.0,635.0
1,2,1.0,439769.0,1/12/2015 5:00:00,B,5,2,0,2,1,...,Y,51.0,59.0,440201.0,1.0,510594400000.0,0.075555,15000US510594402011,3352.0,304.0
2,3,2.0,372042.0,12/4/2015 5:00:00,O,1,2,0,1,1,...,Y,51.0,61.0,930402.0,3.0,510619300000.0,0.14208,15000US510619304023,3360.0,322.0
3,4,1.0,382083.0,3/14/2015 5:00:00,C,1,4,0,2,3,...,Y,51.0,59.0,491402.0,1.0,510594900000.0,0.037802,15000US510594914021,2390.0,408.0
4,5,1.0,617812.0,9/18/2015 5:00:00,O,1,5,0,1,1,...,Y,51.0,165.0,10400.0,1.0,511650100000.0,0.491116,15000US511650104001,1072.0,165.0


In [15]:
crash_withpop = pd.merge(crash, population, how='left', left_on = 'COUNTYFP', right_on='CountyFIPS')


In [16]:
crash_withpop

Unnamed: 0,OID_,Join_Count,TARGET_FID,CRASH_DT,CRASH_SEVERITY,WEATHER_CONDITION,LIGHT_CONDITION,RD_TYPE,ROADWAY_SURFACE_COND,ROADWAY_ALIGNMENT,...,COUNTYFP,TRACTCE,BLKGRPCE,GEOID,Shape_Length_1,GEOID_1,B01001e1,B01001m1,CountyFIPS,Population
0,1,1.0,346205.0,8/31/2015 5:00:00,O,1,4,0,1,1,...,660.0,205.0,1.0,5.166000e+11,0.103457,15000US516600002051,6039.0,635.0,660.0,54810
1,2,1.0,439769.0,1/12/2015 5:00:00,B,5,2,0,2,1,...,59.0,440201.0,1.0,5.105944e+11,0.075555,15000US510594402011,3352.0,304.0,59.0,1150309
2,3,2.0,372042.0,12/4/2015 5:00:00,O,1,2,0,1,1,...,61.0,930402.0,3.0,5.106193e+11,0.142080,15000US510619304023,3360.0,322.0,61.0,72972
3,4,1.0,382083.0,3/14/2015 5:00:00,C,1,4,0,2,3,...,59.0,491402.0,1.0,5.105949e+11,0.037802,15000US510594914021,2390.0,408.0,59.0,1150309
4,5,1.0,617812.0,9/18/2015 5:00:00,O,1,5,0,1,1,...,165.0,10400.0,1.0,5.116501e+11,0.491116,15000US511650104001,1072.0,165.0,165.0,83757
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
901832,901833,2.0,523003.0,4/17/2022 5:00:00,O,1,2,0,1,1,...,153.0,900300.0,1.0,5.115390e+11,0.061243,15000US511539003001,1426.0,322.0,153.0,482204
901833,901834,1.0,376240.0,1/11/2022 5:00:00,O,1,5,0,1,1,...,59.0,481103.0,2.0,5.105948e+11,0.019057,15000US510594811032,1817.0,293.0,59.0,1150309
901834,901835,2.0,208326.0,1/26/2022 5:00:00,O,1,4,0,1,4,...,775.0,10100.0,1.0,5.177501e+11,0.077070,15000US517750101001,1516.0,430.0,775.0,25346
901835,901836,1.0,178181.0,2/3/2022 5:00:00,O,5,5,2,2,1,...,71.0,930300.0,1.0,5.107193e+11,0.396275,15000US510719303001,1248.0,320.0,71.0,16787


In [17]:
crash_for_ml = crash_withpop[['WEATHER_CONDITION','LIGHT_CONDITION','ROADWAY_ALIGNMENT',
                             'ROADWAY_SURFACE_TYPE','ROADWAY_DEFECT', 'ROADWAY_DESCRIPTION',
                              'TRFC_CTRL_STATUS_TYPE','WORK_ZONE_TYPE','AREA_TYPE',
                              'Population', 
                              'CRASH_SEVERITY' ]]

In [18]:
replace_map = {'WEATHER_CONDITION':weather_dict,
              'LIGHT_CONDITION':light_condition,
              'ROADWAY_ALIGNMENT':alignment_dict,
              'ROADWAY_SURFACE_TYPE':surface_type,
              'ROADWAY_DEFECT':defect_dict,
              'ROADWAY_DESCRIPTION':description_dict,
              'TRFC_CTRL_STATUS_TYPE':traffic_status ,
               'WORK_ZONE_TYPE':workzone_dict,
               'AREA_TYPE':area,
               'CRASH_SEVERITY':severity_dict
              }
crash_final = crash_for_ml.replace(replace_map)

In [19]:
crash_final.head()

Unnamed: 0,WEATHER_CONDITION,LIGHT_CONDITION,ROADWAY_ALIGNMENT,ROADWAY_SURFACE_TYPE,ROADWAY_DEFECT,ROADWAY_DESCRIPTION,TRFC_CTRL_STATUS_TYPE,WORK_ZONE_TYPE,AREA_TYPE,Population,CRASH_SEVERITY
0,No Adverse Conditions,Darkness-Road Lighted,Straight-Level,"Blacktop, Asphalt, Bituminous",No Defects,"Two-Way, Divided, Unprotected Median",Yes - Working,Non Workzone,Urban,54810,Property Damage Only
1,Rain,Daylight,Straight-Level,"Blacktop, Asphalt, Bituminous",No Defects,"Two-Way, Divided, Positive Median Barrier",Yes - Working,Non Workzone,Urban,1150309,Visible Injury
2,No Adverse Conditions,Daylight,Straight-Level,"Blacktop, Asphalt, Bituminous",No Defects,"Two-Way, Divided, Unprotected Median",Yes - Working,Non Workzone,Urban,72972,Property Damage Only
3,No Adverse Conditions,Darkness-Road Lighted,Grade-Level,"Blacktop, Asphalt, Bituminous",No Defects,"Two-Way, Divided, Unprotected Median",Yes - Not Working,Non Workzone,Urban,1150309,Non Visible Injury
4,No Adverse Conditions,Darkness-Road not Lighted,Straight-Level,"Blacktop, Asphalt, Bituminous",No Defects,"Two-Way, Divided, Unprotected Median",Yes - Working,Lane Closure,Rural,83757,Property Damage Only
