In [1]:
import shapely as shp
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
streets = pd.read_pickle('../../data/interim/features/streets_by_nhood.pickle')

In [3]:
neighborhoods = gpd.read_file('../../data/raw/Neighborhood_Statistical_Areas/Neighborhood_Statistical_Areas.shp')
neighborhoods = neighborhoods.to_crs(epsg=4326)
neighborhoods = neighborhoods[['OBJECTID', 'GNOCDC_LAB', 'geometry']]
neighborhoods.columns = ['nhood_id', 'nhood', 'geometry']

In [4]:
accidents = pd.read_pickle('../../data/interim/features/accidents.pickle')
accidents['geometry'] = list(zip(accidents.lon, accidents.lat))
accidents['geometry'] = accidents.geometry.apply(shp.geometry.Point)
accidents = gpd.GeoDataFrame(accidents, geometry='geometry')
accidents.crs = {'init' :'epsg:4326'}

In [5]:
crs_louisiana = {'proj': 'lcc',
                 'lat_1': 29.3,
                 'lat_2': 30.7,
                 'lat_0': 28.5,
                 'lon_0': -91.33333333333333,
                 'x_0': 1000000,
                 'y_0': 0,
                 'datum': 'NAD83',
                 'units': 'us-ft',
                 'no_defs': True}

In [6]:
accidents_in_neighborhoods = gpd.sjoin(accidents, neighborhoods)



In [7]:
accidents_in_neighborhoods['ItemNumber'] = accidents_in_neighborhoods.index

In [8]:
columns = ['ItemNumber', 'TimeCreate', 'street_1', 'street_2', 'geometry', 'nhood_id', 'nhood']
accidents_in_neighborhoods = accidents_in_neighborhoods[columns]

In [9]:
accidents_join_street = pd.merge(accidents_in_neighborhoods,
                                 streets,
                                 left_on=['street_1', 'nhood_id'],
                                 right_on=['fullnameab', 'nhood_id'])

In [10]:
# select unmatched accidents
unmatched_accidents = accidents_in_neighborhoods[~accidents_in_neighborhoods.index.isin(accidents_join_street.ItemNumber)]

In [11]:
rematched_accidents = pd.merge(unmatched_accidents,
                               streets,
                               left_on=['street_2', 'nhood_id'],
                               right_on=['fullnameab', 'nhood_id'])

In [12]:
no_street_match = unmatched_accidents[~unmatched_accidents.index.isin(rematched_accidents.ItemNumber)]

In [13]:
street_match = pd.concat([accidents_join_street, rematched_accidents], ignore_index=True)
street_match = street_match[['ItemNumber', 'TimeCreate', 'segment_id']]

In [14]:
no_street_match = no_street_match.to_crs(crs_louisiana)

In [15]:
unmatched_items = no_street_match.index

In [16]:
no_street_match['buffer_50'] = no_street_match.geometry.buffer(50)
no_street_match['buffer_100'] = no_street_match.geometry.buffer(100)
no_street_match['buffer_250'] = no_street_match.geometry.buffer(250)

In [17]:
buffer_50 = no_street_match.set_geometry('buffer_50')
buffer_100 = no_street_match.set_geometry('buffer_100')
buffer_250 = no_street_match.set_geometry('buffer_250')

In [18]:
streets_la_crs = streets.to_crs(crs_louisiana)

In [19]:
spatial_join_50 = gpd.sjoin(buffer_50, streets_la_crs)
spatial_join_100 = gpd.sjoin(buffer_100, streets_la_crs)
spatial_join_250 = gpd.sjoin(buffer_250, streets_la_crs)

In [20]:
nhood_match_50 = spatial_join_50[spatial_join_50.nhood_id_left == spatial_join_50.nhood_id_right]

In [21]:
nhood_match_50_unique = nhood_match_50.groupby('ItemNumber').first()
nhood_match_50_unique = nhood_match_50_unique.reset_index()
nhood_match_50_unique = nhood_match_50_unique[['ItemNumber', 'TimeCreate', 'segment_id']]

In [22]:
unmatched_items = unmatched_items[~unmatched_items.isin(nhood_match_50_unique.ItemNumber)]

In [23]:
spatial_join_100_unmatched = spatial_join_100[spatial_join_100.index.isin(unmatched_items)]
nhood_match_100 = spatial_join_100_unmatched[spatial_join_100_unmatched.nhood_id_left == spatial_join_100_unmatched.nhood_id_right]

In [24]:
nhood_match_100_unique = nhood_match_100.groupby('ItemNumber').first()
nhood_match_100_unique = nhood_match_100_unique.reset_index()
nhood_match_100_unique = nhood_match_100_unique[['ItemNumber', 'TimeCreate', 'segment_id']]

In [25]:
unmatched_items = unmatched_items[~unmatched_items.isin(nhood_match_100_unique.ItemNumber)]

In [26]:
spatial_join_250_unmatched = spatial_join_250[spatial_join_250.index.isin(unmatched_items)]
nhood_match_250 = spatial_join_250_unmatched[spatial_join_250_unmatched.nhood_id_left == spatial_join_250_unmatched.nhood_id_right]

In [27]:
nhood_match_250_unique = nhood_match_250.groupby('ItemNumber').first()
nhood_match_250_unique = nhood_match_250_unique.reset_index()
nhood_match_250_unique = nhood_match_250_unique[['ItemNumber', 'TimeCreate', 'segment_id']]

In [28]:
dataframes = [street_match,
              nhood_match_50_unique,
              nhood_match_100_unique,
              nhood_match_250_unique]

joined_matches = pd.concat(dataframes, ignore_index=True)

In [29]:
street_by_time = joined_matches.groupby(['segment_id', 'TimeCreate']).count().reset_index()

In [30]:
street_by_time['day_hour'] = street_by_time.TimeCreate.dt.date.astype('str') \
                             + ' ' \
                             + street_by_time.TimeCreate.dt.hour.astype('str') \
                             + ':00:00'

street_by_time['day_hour'] = pd.to_datetime(street_by_time.day_hour)

In [31]:
street_by_time = street_by_time[['day_hour', 'segment_id', 'ItemNumber']]
street_by_time.columns = ['day_hour', 'segment_id', 'accident_yn']

In [32]:
street_by_time.to_pickle('../../data/interim/features/street_by_time.pickle')