Based on the spatial analysis of Chicago crashes, we add a new column to the data that we label as "AREA" and that indicates in which community area of Chicago each crash happened. 

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

crashes = pd.read_csv('crashes.csv')
chicago_areas = gpd.read_file('Community.geojson')
crashes['AREAS'] = 0
for index, row in crashes.iterrows():
    point = Point(crashes.loc[index,'LONGITUDE'],crashes.loc[index,'LATITUDE'])
    area = 0
    for geom in chicago_areas.geometry:
        if (point.within(geom)):
            crashes.at[index,'AREAS']= area
            break
        area = area + 1

We then keep the relevant columns, and extract the features and target variables that we are going to use in building the predictive model.

In [3]:
crash_features = crashes[['POSTED_SPEED_LIMIT', 'TRAFFIC_CONTROL_DEVICE',
                   'DEVICE_CONDITION', 'WEATHER_CONDITION', 'LIGHTING_CONDITION',
                   'FIRST_CRASH_TYPE', 'TRAFFICWAY_TYPE', 'ALIGNMENT',
                   'ROADWAY_SURFACE_COND', 'PRIM_CONTRIBUTORY_CAUSE',
                   'CRASH_HOUR', 'CRASH_DAY_OF_WEEK', 'CRASH_MONTH','AREAS']]
crash_type = crashes [['CRASH_TYPE']]

We then save the features and target variables.

In [4]:
crash_features.to_csv("crash_features.csv")
crash_type.to_csv('crash_type.csv')

crash_features.head()

In [5]:
crash_features.head()

Unnamed: 0,POSTED_SPEED_LIMIT,TRAFFIC_CONTROL_DEVICE,DEVICE_CONDITION,WEATHER_CONDITION,LIGHTING_CONDITION,FIRST_CRASH_TYPE,TRAFFICWAY_TYPE,ALIGNMENT,ROADWAY_SURFACE_COND,PRIM_CONTRIBUTORY_CAUSE,CRASH_HOUR,CRASH_DAY_OF_WEEK,CRASH_MONTH,AREAS
0,20,STOP SIGN/FLASHER,FUNCTIONING PROPERLY,CLEAR,"DARKNESS, LIGHTED ROAD",PARKED MOTOR VEHICLE,ONE-WAY,STRAIGHT AND LEVEL,DRY,DISTRACTION - FROM OUTSIDE VEHICLE,2,3,9,25
1,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,CLEAR,"DARKNESS, LIGHTED ROAD",FIXED OBJECT,FOUR WAY,STRAIGHT AND LEVEL,DRY,UNABLE TO DETERMINE,1,3,9,26
2,30,NO CONTROLS,NO CONTROLS,CLEAR,"DARKNESS, LIGHTED ROAD",PARKED MOTOR VEHICLE,ONE-WAY,STRAIGHT AND LEVEL,DRY,NOT APPLICABLE,1,3,9,18
3,25,NO CONTROLS,NO CONTROLS,CLEAR,"DARKNESS, LIGHTED ROAD",PARKED MOTOR VEHICLE,NOT DIVIDED,STRAIGHT AND LEVEL,DRY,PHYSICAL CONDITION OF DRIVER,1,3,9,22
4,30,NO CONTROLS,NO CONTROLS,CLEAR,"DARKNESS, LIGHTED ROAD",SIDESWIPE SAME DIRECTION,DIVIDED - W/MEDIAN (NOT RAISED),STRAIGHT AND LEVEL,DRY,UNABLE TO DETERMINE,0,3,9,60
