In [19]:
import pandas as pd
import pickle
import os

In [20]:
def EuclideanDistance(a,b):
    x = (a[0]-b[0])**2
    y = (a[1]-b[1])**2
    return (x+y)**(1/2)

In [21]:
# load original speed data
original_speed = pd.read_csv("link_speed.csv",header=None)
original_speed = original_speed[2]

In [22]:
# load road length file and join with id
link_file = pd.read_csv("link_file.csv",header=None)
road_file = pd.read_csv("road_file.csv",header=None)
road_file[3] = link_file[2]
road_file['speed'] = 0
road_file = road_file.rename(columns={0: "WISLR_LINKID", 1: "FROM", 2:'TO', 3:"LENGTH", "speed" :"SPEED"})

In [23]:
# read functional class data
class_file = pd.read_csv(os.path.join("transportation_data","wislr-data","wislr_rdwy_link_fnct_cls.csv"))

In [24]:
#join road file with class data
funClass = []

for i in list(road_file["WISLR_LINKID"]):
    df = class_file[class_file["RDWY_LINK_ID"] == i]
    fClass = df.iloc[0,-1]
    if type(fClass) != float:
        funClass.append(fClass)
    else:
        funClass.append("None")
        
road_file["functional_class"] = funClass

In [25]:
# read crash speed data
crash_speed = pd.read_csv(os.path.join("transportation_data","crash-data","crash-data-link-2017-2020-postspd.csv"))
crash_speed = crash_speed[["WISLR_LINKID","POSTSPD1"]]
crash_speed = crash_speed.rename(columns = {"POSTSPD1" : "SPEED"})
crash_speed = crash_speed.dropna()

In [26]:
# read coordinate data
with open("reference_coordinate", "rb") as f:
    coord = pickle.load(f)

In [27]:
road_coordinate = []
for index, row in road_file.iterrows():
    road_coordinate.append(coord[row["TO"]][:2])
road_file['coordinate'] = road_coordinate

In [28]:
speed = []
for i in road_file['WISLR_LINKID']:
    df = crash_speed[crash_speed['WISLR_LINKID'] == i]
    
    # check if there is a crash data on that road
    if df.shape[0] == 0:
        speed.append(0)
        continue
    
    # calculate average speed  
    speed.append(df['SPEED'].mean())

In [29]:
road_file['SPEED'] = speed

In [30]:
# fill in the missing functional class by using nearest neighbor
hasClassID = list(road_file[road_file['functional_class'] != "None"]['WISLR_LINKID'])
hasClassCoord = list(road_file[road_file['functional_class'] != "None"]['coordinate'])
hasClassName = list(road_file[road_file['functional_class'] != "None"]['functional_class'])

funClass = []
for i in road_file['WISLR_LINKID']:
    row = road_file[road_file['WISLR_LINKID'] == i]
    fClass = row['functional_class'].iloc[0]
    if fClass != "None":
        funClass.append(fClass)
    else:
        currentCoord = row['coordinate'].iloc[0]
        distTemp = []
        for j in hasClassCoord:
            distTemp.append(EuclideanDistance(j,currentCoord))
        funClass.append(hasClassName[distTemp.index(max(distTemp))])
        
road_file["functional_class"] = funClass

In [31]:
# calculate the average speed for each functional class
df = road_file[road_file["SPEED"] > 0]
estimated_speed = {}
for i in list(set(df["functional_class"])):
    estimated_speed[i] = df[df["functional_class"]==i]["SPEED"].mean()
    
estimated_speed['Rural Minor Collector'] = df[df["functional_class"].str.contains("Rural")]["SPEED"].mean()

In [32]:
estimated_speed

{'Urban Local': 27.405851229380644,
 'Urban Other Connecting Link Principal Arterial': 33.2473611278323,
 'Rural Minor Arterial': 50.10185185185185,
 'Urban Minor Arterial': 32.46903780958517,
 'Rural Local': 45.83094939480809,
 'Urban Collector': 28.926700871145318,
 'Rural Major Collector': 49.30952380952381,
 'Rural Minor Collector': 46.32954386882959}

In [33]:
speed = []
for i in road_file['WISLR_LINKID']:
    row = road_file[road_file['WISLR_LINKID'] == i]
    spd = float(row['SPEED'].iloc[0])
    fClass = row['functional_class'].iloc[0]
    if spd > 0:
        speed.append(spd)
    else:
        speed.append(estimated_speed[fClass])
        
road_file["SPEED"] = speed

In [34]:
road_file["TIME"] = road_file["LENGTH"]/road_file["SPEED"]

In [35]:
#link_time = road_file[["FROM","TO","TIME"]]
#link_time.to_csv("link_time_updated.csv",index=True, header=True)

In [36]:
road_file

Unnamed: 0,WISLR_LINKID,FROM,TO,LENGTH,SPEED,functional_class,coordinate,TIME
0,4818580,1661200,1660820,4066,33.247361,Urban Other Connecting Link Principal Arterial,"[562949.7867, 297024.33029999956]",122.295420
1,4819008,1661678,1661705,158,27.405851,Urban Local,"[550324.932, 294479.6420000009]",5.765192
2,4819009,1661704,1661705,264,27.405851,Urban Local,"[550324.932, 294479.6420000009]",9.632979
3,4819014,1661723,1661697,581,27.405851,Urban Local,"[550579.0520000001, 294491.98699999973]",21.199852
4,4819015,1661705,1661723,264,27.405851,Urban Local,"[550411.9919999996, 294452.6740000006]",9.632979
...,...,...,...,...,...,...,...,...
3127,5571520,1663210,1663276,1003,28.926701,Urban Collector,"[557856.7019999996, 291739.8699999992]",34.673847
3128,5571521,1661558,1661551,317,27.405851,Urban Local,"[562454.9359999998, 294905.79399999976]",11.566873
3129,5571522,1661551,1661558,317,27.405851,Urban Local,"[562352.3899999997, 294899.0700000003]",11.566873
3130,5571523,2016715,2005025,1108,28.926701,Urban Collector,"[561378.46, 293921.2990000006]",38.303711


In [39]:
road_file = road_file[["WISLR_LINKID","functional_class"]]

In [40]:
road_file.to_csv("functional_class.csv",index=False)

# Save rural or urban

In [19]:
df = road_file[["WISLR_LINKID","functional_class"]]

In [22]:
ruralUrban = []
for i in list(df["functional_class"]):
    if "Urban" in i:
        ruralUrban.append("Urban")
    else:
        ruralUrban.append("Rural")

In [25]:
road_file['RuralOrUrban'] = ruralUrban

In [27]:
link_id = list(road_file["WISLR_LINKID"])
RorU = list(road_file["RuralOrUrban"])

In [28]:
RorUDict = {}
for i in range(len(link_id)):
    RorUDict[link_id[i]] = RorU[i]

In [32]:
with open("ruralOrUrban.pk",'wb') as f:
    pickle.dump(RorUDict,f) 