In [148]:
import os
import json
import folium
import pandas as pd
import numpy
import math
os.chdir('../src/data')
import util
os.chdir('../../notebooks')

In [10]:
PROCESSED_DATA_FP = '../data/processed/'

inters = {}
with open(PROCESSED_DATA_FP + 'inters_data.json') as f:
    data = json.load(f)
    for key, value in data.iteritems():
        inters[str(key)] = value[0]

with open(PROCESSED_DATA_FP + 'tmc_summary.json') as f:
    addresses = json.load(f)

    print addresses[0]

{u'Right': 563, u'crash_count': 1, u'Longitude': u'-71.0631259', u'Filename': u'7436_2407_BUNKER-HILL-ST,-MYSTIC-ST,-SCHOOL-ST_NA_NA_CHARLESTOWN_11-HOURS_NA_09-18-2014.XLS', u'Hours': u'11', u'Date': u'2014-09-18', u'near_intersection_id': u'8032', u'Normalized': 6046, u'Address': u'Mystic St & Bunker Hill St, Boston, MA 02129, USA', u'Latitude': u'42.379195', u'near_id': 8032, u'Total': 4430, u'Conflict': 529, u'Left': 529}


In [228]:
def compare_crashes():

    count = 0

    inters = {}
    with open(PROCESSED_DATA_FP + 'inters_data.json') as f:
        data = json.load(f)
        for key, value in data.iteritems():
            inters[str(key)] = value[0]

    
    crashes_by_seg = {}
    with open(PROCESSED_DATA_FP + 'crash_joined.json') as f:
        data = json.load(f)
        for row in data:
            if str(row['near_id']) == '':
                next
            if str(row['near_id']) not in crashes_by_seg.keys():
                crashes_by_seg[str(row['near_id'])] = {
                    'total': 0, 'type': [], 'values': []}
            crashes_by_seg[str(row['near_id'])]['total'] += 1
            crashes_by_seg[str(row['near_id'])]['type'].append(
                row['FIRST_EVENT_SUBTYPE'])
            crashes_by_seg[str(row['near_id'])]['values'].append(row)
            
    crash_items, crash_locations = util.group_json_by_location(PROCESSED_DATA_FP + 'crash_joined.json')

    ccount = 0
    tcount = 0
   
    counts = {
        'low_volume': 0,
        'high_volume': 0,
        'low_volume_crash': 0,
        'high_volume_crash': 0,
        'low_speed': 0,
        'low_speed_crash': 0,
        'high_speed': 0,
        'high_speed_crash': 0,
        'low_conflict': 0,
        'low_conflict_crash': 0,
        'high_conflict': 0,
        'high_conflict_crash': 0,
        'high_left': 0,
        'high_left_crash': 0,
        'high_right': 0,
        'high_right_crash': 0,
    }
    with open(PROCESSED_DATA_FP + 'tmc_summary.json') as f:
        data = json.load(f)
        
        for row in data:
            if row['near_intersection_id']:
                inter_info = inters[row['near_intersection_id']]
                speed = inter_info['SPEEDLIMIT']
                volume = row['Normalized']
                
                # speed binning
                if speed <= 25:
                    counts['low_speed'] += 1
                else:
                    counts['high_speed'] += 1
                
                # volume binning
                if volume < 20000:
                    counts['low_volume'] += 1
                else:
                    counts['high_volume'] += 1

                # conflict binning
                if row['Conflict'] < 1000:
                    counts['low_conflict'] += 1
                else:
                    counts['high_conflict'] += 1

                if row['Left'] > 1000:
                    counts['high_left'] += 1
                if row['Right'] > 1000:
                    counts['high_right'] += 1

                if row['near_intersection_id'] in crash_locations.keys():
                    crash_count = crash_locations[row['near_intersection_id']]['count']
                    if crash_count:
                        ccount += 1

                    if speed <= 25:
                        counts['low_speed_crash'] += 1
                    else:
                        counts['high_speed_crash'] += 1
                
                    # volume binning
                    if volume < 20000:
                        counts['low_volume_crash'] += 1
                    else:
                        counts['high_volume_crash'] += 1

                    if row['Conflict'] < 1000:
                        counts['low_conflict_crash'] += 1
                    else:
                        counts['high_conflict_crash'] += 1

                    if row['Left'] > 1000:
                        counts['high_left_crash'] += 1
                    if row['Right'] > 1000:
                        counts['high_right_crash'] += 1

                tcount += 1

    print 'Percentage of locations with crash:' + str(round(100 * ccount/tcount)) + ' out of ' + str(tcount)
    print 'Percentage of high volume with crash:' + str(round(100 * counts['high_volume_crash']/counts['high_volume'])) + ' out of ' + str(counts['high_volume'])
    print 'Percentage of high speed limit with crash:' + str(round(100*counts['high_speed_crash']/counts['high_speed'])) + ' out of ' + str(counts['high_speed'])
    print 'Percentage of high conflict with crash:' + str(round(100*counts['high_conflict_crash']/counts['high_conflict'])) + ' out of ' + str(counts['high_conflict'])
    print 'Percentage of high left with crash:' + str(round(100*counts['high_left_crash']/counts['high_left'])) + ' out of ' + str(counts['high_left'])
    print 'Percentage of high right with crash:' + str(round(100*counts['high_right_crash']/counts['high_right'])) + ' out of ' + str(counts['high_right'])

In [27]:
def plot_tmcs(addresses):

    # First create basemap                                                                                                                                              
    points = folium.Map(
        [42.3601, -71.0589],
        tiles='Cartodb Positron',
        zoom_start=12
    )

    # plot tmcs                                                                                                                                                         
    for address in addresses:
        if not pd.isnull(address['Latitude']):
            folium.CircleMarker(
                location=[float(address['Latitude']), float(address['Longitude'])],
                fill_color='yellow', fill=True, fill_opacity=.7, color='yellow',
                radius=6.0).add_to(points)

    # Plot atrs                                                                                                                                                         
    atrs = util.csv_to_projected_records(
        PROCESSED_DATA_FP + 'geocoded_atrs.csv', x='lng', y='lat')
    for atr in atrs:
        properties = atr['properties']
        if properties['lat']:
            folium.CircleMarker(
                location=[float(properties['lat']), float(properties['lng'])],
                fill_color='green', fill=True, fill_opacity=.7, color='grey',
                radius=6.0).add_to(points)

    display(points)


In [229]:
compare_crashes()
#plot_tmcs(addresses)



Percentage of locations with crash:62.0 out of 335
Percentage of high volume with crash:83.0 out of 100
Percentage of high speed limit with crash:70.0 out of 86
Percentage of high conflict with crash:75.0 out of 168
Percentage of high left with crash:75.0 out of 176
Percentage of high right with crash:71.0 out of 173
