In [None]:
import numpy as np
import pandas as pd
import matplotlib as plt

### Go through the crash estimation steps for one project and see where the numbers start to get off

In [None]:
## First, let's pick a project with a large/unreasonable number of crashes. Like >100 or so.
crashes = pd.read_csv('output_2023_09_05/reports/safety-4-combined-b-crashes-all.csv')
(crashes[crashes["ECmoj model"] > 100])["Project ID"].unique()

Project ID (picked the first one from table above): 64d2a1c2597e1e819a7b4309

Open the 'debug' folder for this project and pull out data to calculate the model crashes
Actually if I can get as much stuff as possible from overall reports, that might make this easier (to be able to iterate/generalize to all projects later)

Data needed:
- Length, functional class, volume class for all segments/intersections in the project
    - overall-5-ways.csv, overall-6-intersections.csv
- Project length/count totals
    - overall-2-reach-type.csv (or just calculate from overall-5-ways.csv and overall-6-intersections.csv)
- Ljvf totals
    - overall-3-reach-Ljvf.csv
- Alpha constants
    - safety-4-combined-a-crashes-model.csv
- Volume/demand for all segments/intersections in the project
    - overall-5-ways.csv, overall-6-intersections.csv
- Volume/demand totals
    - safety-5-volume-d-combined.csv
- ECCmojvf (to compare against my manual results)
    - safety-4-combined-a-crashes-model.csv
- ECmoj (to compare against my manual results)
    - safety-4-combined-b-crashes-all.csv

In [None]:
segments = pd.read_csv('output_2023_09_05/reports/overall-5-ways.csv')
intersections = pd.read_csv('output_2023_09_05/reports/overall-6-intersections.csv')
alpha = pd.read_csv('output_2023_09_05/lookups/alpha.csv')
Ljvf = pd.read_csv('output_2023_09_05/reports/overall-3-reach-Ljvf.csv')
volume = pd.read_csv('output_2023_09_05/reports/safety-5-volume-d-combined.csv')
crash_model = pd.read_csv('output_2023_09_05/reports/safety-4-combined-a-crashes-model.csv')

In [None]:
segments[segments["Project ID"] == "64d2a1c2597e1e819a7b4309"]
## Oops, this isn't in this table... I wonder why? anyway, let's choose a different project!

In [None]:
len(crashes["Project ID"].unique())

In [None]:
len(segments["Project ID"].unique())
## I wonder why this has 10 less projects than the other table...anyway...

New project ID: 64962a7f1930d10600997fdf

In [None]:
## Get all segments and intersections in project
project_segments = segments[segments["Project ID"] == "64962a7f1930d10600997fdf"]
project_intersections = intersections[intersections["Project ID"] == "64962a7f1930d10600997fdf"]

### 1. Ljvf

In [None]:
## Find Ljvf - segments
## Wait, what is the 'V Volume class' in Ljvf? Is it bicycle volume class or pedestrian volume class?
## Based on technical documentation and emails from Matt, I think this should be bicycle volume class for roadways and pedestrian volume class for intersections
## Group by type, volume class, functional class and sum length
L_segment_vf = project_segments.groupby(["Type","Bicycle volume class","Functional class"])["Length"].sum()

In [None]:
## Also find total Ljvf - sum length (only keep separated by type)
project_segments.groupby(["Type"])["Length"].sum()

In [None]:
## Find Ljvf - intersections
L_intersection_vf = project_intersections.groupby(["Type","Pedestrian volume class","Functional class"])["Node ID"].count()

In [None]:
## Also find total Ljvf - count all (only keep separated by type)
project_intersections.groupby(["Type"])["Node ID"].count()

### 2. Alpha constant

In [None]:
## Find e^alpha from alpha constant
alpha["e_alpha"] = np.exp(alpha["alpha"])

### 3. Volume

$j=roadway, EV_{mj}=\sum_{w}E_{wm}$

$j=intersection, EV_{mj}=\sum_{i}E_{im}$

In [None]:
# replace "Not applicable" with "NaN" to make it possible to be a float type
project_segments_n = project_segments.replace("Not applicable",np.NaN)
project_intersections_n = project_intersections.replace("Not applicable",np.NaN)

In [None]:
project_segments_n["Bicycle exposure"]=pd.to_numeric(project_segments_n["Bicycle exposure"])
project_segments_n["Pedestrian exposure"]=pd.to_numeric(project_segments_n["Pedestrian exposure"])
project_intersections_n["Bicycle exposure"]=pd.to_numeric(project_intersections_n["Bicycle exposure"])
project_intersections_n["Pedestrian exposure"]=pd.to_numeric(project_intersections_n["Pedestrian exposure"])

In [None]:
## Volume = sum of exposure across all ways/intersections
V_bicycle_segment = project_segments_n.groupby(["Type"])["Bicycle exposure"].sum()
V_pedestrian_segment = project_segments_n.groupby(["Type"])["Pedestrian exposure"].sum()
V_bicycle_intersection = project_intersections_n.groupby(["Type"])["Bicycle exposure"].sum()
V_pedestrian_intersection = project_intersections_n.groupby(["Type"])["Pedestrian exposure"].sum()
print(V_bicycling_segment, V_walking_segment, V_bicycling_intersection,V_walking_intersection)

### 4. Crashes by functional/volume class

$ECC_{cmojvf} = e^{â±­_{mojvf}} * L_{jvf} * (EV_{cmj})^{p}$

$EC_{cmoj} = \sum_{f}\sum_{v}ECC_{cmojvf}$

1. Starting with segments

In [None]:
L_segment_vf.index

In [None]:
L_segment_vf.index.rename(["Type","volume","functional class"])
L_segment_vf.index.set_levels(L_segment_vf.index.levels[1].str.lower(),level=1)
L_segment_vf.loc[('network')]

In [None]:
alpha[alpha["location type"] == "roadway"]

In [None]:
alpha_L_segment_vf = pd.merge(alpha[alpha["location type"] == "roadway"],L_segment_vf,on=["volume","functional class"],how='outer')
alpha_L_segment_vf["e_alpha_Length"] = alpha_L_segment_vf["e_alpha"]*alpha_L_segment_vf["Length"]

In [None]:
Vm_segment = pd.DataFrame(data={"mode":["bicycling","walking","combined"],"Vmj":[V_bicycle_segment[0],V_pedestrian_segment[0],V_bicycle_segment[0] + V_pedestrian_segment[0]]})
alpha_L_segment_vf_V_m = pd.merge(alpha_L_segment_vf, Vm_segment,on="mode")

In [None]:
alpha_L_segment_vf_V_m["Vmj_p"] = pow(alpha_L_segment_vf_V_m["Vmj"],0.5)
alpha_L_segment_vf_V_m["e_alpha_Vmj_p"] = alpha_L_segment_vf_V_m["e_alpha"]*alpha_L_segment_vf_V_m["Vmj_p"]
alpha_L_segment_vf_V_m["ECCmojvf"] = alpha_L_segment_vf_V_m["e_alpha_Length"]*alpha_L_segment_vf_V_m["Vmj_p"]
alpha_L_segment_vf_V_m

In [None]:
## This is e^alpha * Volume which should be crashes/mile
alpha_L_segment_vf_V_m.groupby(["mode","outcome"])["e_alpha_Vmj_p"].plot(legend=True)

In [None]:
## Crashes
alpha_L_segment_vf_V_m.groupby(["mode","outcome"])["ECCmojvf"].plot(legend=True)

In [None]:
## ECmoj (summed across volume and functional classes)
alpha_L_segment_vf_V_m.groupby(["mode","outcome"]).sum()

So when calculated manually, segments also have hundreds/thousands of crashes!? Clearly it seems like there is some problem or difference in the way the tool is calculating crashes from the given data (regardless of whether there are also issues with the underlying data/constants). This might have something to do with the Ljvf calculation...

2. Next intersections

In [None]:
#Vm_intersection = V_bicycle_intersection[0]
#Vm_intersection = V_pedestrian_intersection[0]
#Vm_intersection = V_bicycle_intersection[0] + V_pedestrian_intersection[0]

3. Compare with the Ljvf, Vmj, ECCmojvf, ECmoj used in the tool

In [None]:
## very messy but it is supposed to be slow and manual so it's ok
## start with segments:
## for mode in ["bicycling","walking","combined"]:
##    if mode == "bicycling":
##        Vm_segment = V_bicycle_segment[0]
##    if mode == "walking":
##        Vm_segment = V_pedestrian_segment[0]
##    if mode == "combined":
##        Vm_segment = V_bicycle_segment[0] + V_pedestrian_segment[0]
##    print("Volume for %s"% mode, Vm_segment)
##    Vm_segment_p = pow(Vm_segment,0.5)
##    print("Volume^0.5 for %s"% mode, Vm_segment_p)
##    for outcome in ["crash","injury","death"]:
##        alpha_segment_mojvf = alpha[alpha["location type"] == "roadway"][alpha["mode"] == mode][alpha["outcome"] == outcome]
##        for index,row in alpha_L_segment_vf.iterrows():
##            print([mode,outcome,row['volume'],row['functional class']],row["e_alpha"],row["Length"])
##            print("e^alpha * Ljvf = %s"% row["e_alpha_Length"])
##            ECCmojvf = row["e_alpha_Length"] * Vm_segment_p
##            print("ECCmojvf = e^alpha * Ljvf * Vmj^0.5 = %s"% ECCmojvf)
        ## for index,row in alpha_segment_mojvf.iterrows():
            ## (volume_class,functional_class) = (row["volume"],row["functional class"])
            ## print([mode,outcome,volume_class,functional_class])
            ## length = L_segment_vf.loc[('network',volume_class,functional_class)]
        ## print(alpha_segment_mjvf)
        ## print(L_segment_vf)
        ## print(mode,outcome)
        ## L_segment_vf
        ## L_intersection_vf