In [2]:
import folium
import os
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shapefile
import pickle
import random

In [17]:
# read road shape
shape_path = os.path.join("..","..","transportation_data","dane-county","Dane_County_Shape_file","Dane_Rdwy_Coord.shp")
#shape_path = os.path.join("..","..","transportation_data","dane-county","Dane_County_Shape_file","Dane_County_Rdwy_link_line.shp")
shape = shapefile.Reader(shape_path)

In [18]:
# get all road ID
linkIDs = []
for sp in shape.shapeRecords():
    road_id = sp.record[1]
    linkIDs.append(road_id)
    
linkIDs = list(set(linkIDs))

In [105]:
# read ref position
ref_path = os.path.join("..","..","transportation_data","dane-county","Dane_County_Shape_file","Dane_Ref_Coord.shp")
ref = shapefile.Reader(ref_path)

ref_dict = {}
for r in ref.shapeRecords():
    ref_position = np.array([r.shape.points[:][0][1],r.shape.points[:][0][0]])
    ref_dict[r.record[1]] = ref_position

In [106]:
# load functional class file
fnct_cls_path = os.path.join("..","..","transportation_data","dane-county","dane_wislr_rdwy_link_fnct_cls.csv")
with open(fnct_cls_path,'r') as f:
    fnct_cls = list(csv.reader(f))
fnct_header = fnct_cls[0]
fnct_cls = fnct_cls[1:]

fnct_cls_list = []
for r in fnct_cls:
    fnct_cls_list.append(int(r[fnct_header.index('RDWY_LINK_ID')]))
    
fnct_cls_list = list(set(fnct_cls_list))

road_to_ref = {}
for r in fnct_cls:
    road_to_ref[int(r[fnct_header.index('RDWY_LINK_ID')])] = \
                        np.array([int(r[fnct_header.index('REF_SITE_FROM_ID')]),int(r[fnct_header.index('REF_SITE_TO_ID')])]) 

In [91]:
fnct_header

['RDWY_LINK_ID',
 'REF_SITE_FROM_ID',
 'REF_SITE_TO_ID',
 'LCM_FROM_TO_DIS',
 'LOC_FLKOS_FT',
 'LOC_TLKOS_FT',
 'ST_PRMY_SYMB_TY',
 'FNCT_CLS_CTGY_TYCD',
 'FNCT_CLS_CTGY_TYDC']

## The total effective roads are the intersection between the road in the shape file and the road in the road information file

In [50]:
total_roads = set(linkIDs).intersection(set(fnct_cls_list))

In [56]:
fnct_header

['RDWY_LINK_ID',
 'REF_SITE_FROM_ID',
 'REF_SITE_TO_ID',
 'LCM_FROM_TO_DIS',
 'LOC_FLKOS_FT',
 'LOC_TLKOS_FT',
 'ST_PRMY_SYMB_TY',
 'FNCT_CLS_CTGY_TYCD',
 'FNCT_CLS_CTGY_TYDC']

# Get the total distance

In [57]:
distance_dict = {}
for r in fnct_cls:
    distance_dict[int(r[fnct_header.index('RDWY_LINK_ID')])] = int(r[fnct_header.index('LCM_FROM_TO_DIS')])

# Use crash information to infer speed

In [60]:


# load crash file
crash_path = os.path.join("..","..","transportation_data","dane-county","dane_crash_link_postspd_2017_2020.csv")
with open(crash_path,'r') as f:
    crash_file = list(csv.reader(f))
    
crash_header = crash_file[0]
crash_file = crash_file[1:]


In [87]:
speed_dict = {}
for key in distance_dict:
    speed_dict[key] = []

# put crash speed into the data structure
has_crash_speed = []
not_has_crash_speed = []
count_crash = 0
for r in crash_file:
    roadID = int(r[crash_header.index('WISLR_LINKID')])
    speed = r[crash_header.index('POSTSPD1')]
    if roadID in speed_dict and speed != '': # make sure road is in dict and also speed is not missing
        speed_dict[roadID].append(int(speed))
        has_crash_speed.append(roadID)
        count_crash += 1
    
not_has_crash_speed = list(set(distance_dict.keys()) - set(has_crash_speed))
# calculate speed for those that has crash speed

for r in set(has_crash_speed):
    speed_dict[r] = np.mean(speed_dict[r])

In [131]:
# use nearest neighbor to construct speed of those roads that does not have any crash

def calculate_middle(ref1,ref2):
    return (ref_dict[ref1] + ref_dict[ref2])/2

def calculate_distance(road1,road2):
    return sum(abs(road_coord[road1] - road_coord[road2]))

road_coord = {}
# 1. get the coordinate of all the roads
for key in distance_dict:
    startAndEnd = road_to_ref[key]
    if startAndEnd[0] in ref_dict and startAndEnd[1] in ref_dict:
        road_coord[key] = calculate_middle(startAndEnd[0],startAndEnd[1])
        
has_crash_speed = list(set([x for x in has_crash_speed if x in road_coord]))
not_has_crash_speed = [x for x in not_has_crash_speed if x in road_coord]

In [None]:
idx = 0 
speed_dict_no_crash = {}
for road1 in not_has_crash_speed:
    if idx % 500 == 0:
        print(idx)
    speed = None
    dist = None
    for road2 in has_crash_speed:
        if speed == None and dist == None:
            speed = speed_dict[road2]
            dist = calculate_distance(road1,road2)
        else:
            if dist > calculate_distance(road1,road2):
                dist = calculate_distance(road1,road2)
                speed = speed_dict[road2]
    speed_dict_no_crash[road1] = speed
    idx += 1
        
        

In [136]:
final_speed_dict = {}
for key in speed_dict_no_crash:
    final_speed_dict[key] = speed_dict_no_crash[key]
    
for key in has_crash_speed:
    final_speed_dict[key] = speed_dict[key]

In [142]:
with open('Dane_speed_dict.pk', 'wb')as f:
    pickle.dump(final_speed_dict,f)
    
with open('Dane_distance_dict.pk', 'wb')as f:
    pickle.dump(distance_dict,f)