# Filter Hartford crashes that happened in September-October-November
## Save the files to `/data` folder. Use those files for all analysis.

In [1]:
import pandas as pd
from geopy.distance import distance, lonlat
from tqdm import tqdm

In [34]:
# Read Persons file
persons_path = 'raw/export_2859_2.csv'
persons_raw = pd.read_csv(persons_path, engine="python", skiprows=1, dtype=str)

# Read Crashes file
crashes_path = 'raw/export_2859_0.csv'
crashes_raw = pd.read_csv(crashes_path, engine="python", skiprows=1, dtype=str)

In [32]:
# We only care about September-October-November crashes in Hartford
relevant_crashes = crashes_raw[
        ((crashes_raw['Month'] == '9')
      | (crashes_raw['Month'] == '10')
      | (crashes_raw['Month'] == '11'))
      & (crashes_raw['Town Name'] == 'Hartford')
    ]

relevant_crashes = relevant_crashes.set_index('CrashId')

# Filter out people that are not involved in relevant crashes
relevant_persons = persons['CrashId'].apply(lambda x: True if x in crashes.index else False)

# Save the files
relevant_crashes.to_csv('data/crashes-sept-thru-nov-hartford.csv', index=False)
persons[relevant_persons].to_csv('data/persons-sept-thru-nov-hartford.csv', index=False)

# Create a Final master file with Persons, Crashes, and closest Crosswalk combined

In [96]:
# Read all data
crashes = pd.read_csv('data/crashes-sept-thru-nov-hartford.csv')
persons = pd.read_csv('data/persons-sept-thru-nov-hartford.csv')
crosswalks = pd.read_csv('data/crosswalk_locations.csv', index_col=0).fillna('False')

  interactivity=interactivity, compiler=compiler, result=result)


In [97]:
# First, for each crash, calculate the closest crosswalk location and record
# rosswalk ID and distance in separate columns of Crashes dataset 
def get_nearest_crosswalk(crash):
    crash_coords = (crash['Longitude'], crash['Latitude'])
    xwk_id = 99999
    xwk_dist = 99999
    xwk_sign = 99999
    
    for i, row in crosswalks.iterrows():
        crosswalk_coords = (row['Long'], row['Lat'])
        dist = distance(lonlat(*crash_coords), lonlat(*crosswalk_coords)).feet
        if dist < xwk_dist:
            xwk_dist = dist
            xwk_id = i
            xwk_sign = row['SignInstalled']
            
            
    return pd.Series({'Nearest Xwalk ID': xwk_id, 'Nearest Xwalk Distance': xwk_dist, 'Nearest Xwalk Sign': xwk_sign})

crashes_with_crosswalk = pd.concat([crashes, crashes.apply(get_nearest_crosswalk, axis=1)], axis=1)


In [1]:
#crashes_with_crosswalk[(crashes_with_crosswalk['Nearest Xwalk Distance'] < 30)
#                      & (crashes_with_crosswalk['Year'] == 2015)]

In [98]:
crashes_with_crosswalk.to_csv('data/crashes_with_crosswalk.csv')

# Speed bumps

In [2]:
# Read all data
crashes = pd.read_csv('data/crashes-sept-thru-nov-hartford.csv')
persons = pd.read_csv('data/persons-sept-thru-nov-hartford.csv')
bumps = pd.read_csv('data/speed-bumps.csv')

tqdm.pandas()

# First, for each crash, calculate the closest crosswalk location and record
# rosswalk ID and distance in separate columns of Crashes dataset 
def get_nearest_bump(crash):
    crash_coords = (crash['Longitude'], crash['Latitude'])
    bump_id = 99999
    bump_dist = 99999
    bump_sign = 99999
    
    for i, row in bumps.iterrows():
        bump_coords = (row['Long'], row['Lat'])
        dist = distance(lonlat(*crash_coords), lonlat(*bump_coords)).feet
        if dist < bump_dist:
            bump_dist = dist
            bump_id = i
            bump_sign = row['O / P']

    return pd.Series({'Nearest Bump ID': bump_id, 'Nearest Bump Distance': bump_dist, 'Nearest Bump Type': bump_sign})

crashes_with_bumps = pd.concat([crashes, crashes.progress_apply(get_nearest_bump, axis=1)], axis=1)

crashes_with_bumps.to_csv('data/crashes_with_bumps.csv')

  interactivity=interactivity, compiler=compiler, result=result)
100%|██████████| 7241/7241 [06:18<00:00, 19.14it/s]
