In [39]:
PROCESSED_DATA_FP = '../osm-data/processed/'
MAP_FP = PROCESSED_DATA_FP + 'maps/'

In [106]:
import json
import os
import fiona
BASE_DIR = os.path.dirname(os.getcwd())
os.chdir(BASE_DIR + '/src/data/')
import util
os.chdir(BASE_DIR + '/notebooks/')

In [68]:
crash_list, crashes_by_id = util.group_json_by_location(PROCESSED_DATA_FP + 'crash_joined.json', otherfields=['TIME'])
print str(len(crash_list)) + " crashes found"


# Crashes by id is a dict where the key is the segment id.  It contains the count of crashes at that intersection
# Can also contain other information about this location, if called with arg otherfields.  In this case, we're
# Looking at time
sorted = [value['count'] for key,value in crashes_by_id.items() if key != '']
sorted.sort(reverse=True)
print str(len(sorted)) + " unique crash locations found\n"

print "Highest number of crashes at any one location: " + str(sorted[0:10])
worst_id = [key for key, value in crashes_by_id.items() if value['count'] == sorted[0]][0]

times = []
for crash_loc in crashes_by_id.values():
    for time in crash_loc['TIME']:
        if time:
            times.append(time)
print str(len(times)) + " out of " + str(len(crash_list)) + " crashes have a time associated"


7220 crashes found
3378 unique crash locations found

Highest number of crashes at any one location: [38, 34, 31, 29, 29, 27, 27, 27, 22, 21]
3202 out of 7220 crashes have a time associated


In [145]:
def get_int_feat(featname, items, buckets=[]):
    results = {}
    for id, values in items.iteritems():
        value = max([x[featname] for x in values])
        
        if buckets:
            found = False
            for bucket in buckets:
                if int(value) <= bucket and not found:
                    value = bucket
                    found = True
            if not found:
                value = buckets[len(buckets)-1]
        if value not in results.keys():
            results[value] = {'crash': 0, 'no_crash': 0}
        if id in crashes_by_id.keys():
            results[value]['crash'] += 1
        else:
            results[value]['no_crash'] += 1
 
    print "\n" + featname + ".............."
    for key, value in results.iteritems():
        print str(key) + ':' + str(float(value['crash'])/(float(value['no_crash'] + value['crash']))) + " out of " + str(value['no_crash'] + value['crash'])


In [158]:
items = json.load(open(PROCESSED_DATA_FP + '/inters_data.json'))

#feats = ['hwy_type', 'width', 'lanes', 'osm_speed', 'signal', 'SPEEDLIMIT', 'Struct_Cnd', 'Surface_Tp', 'F_F_Class']

print len(items)
print "************************************"
get_int_feat('signal', items)
get_int_feat('lanes', items)
get_int_feat('width', items, [9, 14, 19, 29])

# Highway types are kind of problematic to use max for, but it looks like if they're all residential roads, that really reduces crash risk
# Also, too many highway types with combo types (e.g. motorway and motorway link...)
get_int_feat('hwy_type', items)

get_int_feat('SPEEDLIMIT', items)

get_int_feat('Struct_Cnd', items)

get_int_feat('Surface_Tp', items)

get_int_feat('F_F_Class', items)

7850
************************************

signal..............
1:0.433740831296 out of 2045
0:0.151765719208 out of 5805

lanes..............
1:0.0254777070064 out of 157
0:0.0129032258065 out of 155
3:0.467075038285 out of 653
2:0.183288409704 out of 6307
5:0.618421052632 out of 76
4:0.506048387097 out of 496
6:0.5 out of 6

width..............
9:0.102850754612 out of 1789
19:0.290178571429 out of 1792
29:0.447665056361 out of 1863
14:0.0955943474647 out of 2406

hwy_type..............
0:0.40490797546 out of 163
1:0.131403581841 out of 4802
2:0.509803921569 out of 204
3:0.386340468909 out of 1962
4:0.0526315789474 out of 19
5:0.238938053097 out of 113
6:0.354430379747 out of 79
7:0.205882352941 out of 102
8:0.286821705426 out of 129
9:0.292857142857 out of 140
10:0.4 out of 40
11:0.377358490566 out of 53
12:0.555555555556 out of 9
13:0.0 out of 6
14:0.5 out of 6
15:0.6 out of 5
16:0.2 out of 5
17:1.0 out of 2
18:0.5 out of 2
19:0.0 out of 2
20:0.0 out of 2
22:0.0 out of 1
23:0.75 out

In [154]:
def get_non_int_feat(featname, items):
    results = {}
    for x in non_inters:
        value = x['properties'][featname]
        if value not in results.keys():
            results[value] = {'crash': 0, 'no_crash': 0}
        if x['id'] in crashes_by_id.keys():
            results[value]['crash'] += 1
        else:
            results[value]['no_crash'] += 1
            
    print "\n" + featname + ".............."
    for key, value in results.iteritems():
        print str(key) + ':' + str(float(value['crash'])/(float(value['no_crash'] + value['crash']))) + " out of " + str(value['no_crash'] + value['crash'])


In [156]:
non_inters = fiona.open(MAP_FP + 'non_inters_segments.shp')
print len(non_inters)
print non_inters[0]
count = 0

# Feats = ['hwy_type', 'width', 'lanes', 'osm_speed', 'signal', 'SPEEDLIMIT', 'Struct_Cnd', 'Surface_Tp', 'F_F_Class']

lanes = {}
for x in non_inters:
        
    num_lanes = x['properties']['lanes']
    
    if num_lanes not in lanes.keys():
        lanes[num_lanes] = {'crash': 0, 'no_crash': 0}
    if x['id'] in crashes_by_id.keys():
        count += 1
        lanes[num_lanes]['crash'] += 1
    else:
        lanes[num_lanes]['no_crash'] += 1
    

print "Crash rate for non-intersections:" + str(float(count)/float(count + len(non_inters)))
print lanes

get_non_int_feat('lanes', non_inters)

get_non_int_feat('signal', non_inters)
get_non_int_feat('hwy_type', non_inters)
get_non_int_feat('SPEEDLIMIT', non_inters)
get_non_int_feat('Struct_Cnd', non_inters)
get_non_int_feat('Surface_Tp', non_inters)
get_non_int_feat('F_F_Class', non_inters)

15449
{'geometry': {'type': 'LineString', 'coordinates': [(-7911318.680638181, 5213801.374800896), (-7911339.011233901, 5213796.339062805)]}, 'type': 'Feature', 'id': '0', 'properties': OrderedDict([(u'bridge', None), (u'maxspeed', None), (u'AADT', u'22222'), (u'hwy_type', u'0'), (u'id', u'000'), (u'from', u'61341696'), (u'osmid', u'8652528'), (u'area', None), (u'Struct_Cnd', u'2'), (u'orig_id', u'991'), (u'F_F_Class', u'3'), (u'access', None), (u'to', u'61341267'), (u'osm_speed', u'0'), (u'ref', None), (u'highway', u'secondary'), (u'lanes', u'2'), (u'Surface_Tp', u'6'), (u'key', u'0'), (u'name', u'Park Plaza'), (u'tunnel', None), (u'signal', u'0'), (u'length', u'44.9540071988'), (u'oneway', u'True'), (u'SPEEDLIMIT', u'20'), (u'inter', u'0'), (u'width', u'30')])}
Crash rate for non-intersections:0.102689202532
{u'1': {'no_crash': 1224, 'crash': 129}, u'0': {'no_crash': 927, 'crash': 118}, u'3': {'no_crash': 701, 'crash': 85}, u'2': {'no_crash': 10383, 'crash': 1373}, u'5': {'no_crash':