In [None]:
import numpy as np
import pandas as pd

In [None]:
safety = pd.read_csv('output_2023_09_05/reports/safety-4-combined-c-crashes-volume.csv')
travel = pd.read_csv('output_2023_09_05/reports/travel-4-combined.csv')
travel_elements = pd.read_csv('output_2023_09_05/reports/travel-2-change.csv')

## Finding how many projects have zero or negative travel/positive safety benefits
- Filter projects where travel benefits are <= 0
- Filter projects where safety benefits are >= 0
- Find total # of projects and see what proportion of projects have "unreasonable" travel or safety benefits
- Graphs of travel/safety benefits by mode
- Graphs of travel/safety benefits from min -> max to visually see how many of the projects have "unreasonable" benefits

TODO:
- Find projects where ALL travel or safety benefits are "unreasonable" (not just at least one of the benefits)

### Initial counts/proportions

In [None]:
project_id = travel["Project ID"].unique() ## find a more principled way to do this
len(project_id)
## So there are 186 projects in total

In [None]:
## Projects with "bad" results for travel - 0 or negative increase in travel
## (note that this is not always bad. there could be 0 increase for some mode but a positive increase for the others)
## (maybe find a total increase across both modes and see if THAT is ever 0)
travel[travel["Total Increase in Travel"] <= 0]

In [None]:
## All projects with at least one "bad" travel result
bad_travel_id = travel[travel["Total Increase in Travel"] <= 0]["Project ID"].unique()
# All projects with at least one negative travel result
negative_travel_id = travel[travel["Total Increase in Travel"] < 0]["Project ID"].unique()
# All projects with at least one zero travel result
zero_travel_id = travel[travel["Total Increase in Travel"] == 0]["Project ID"].unique()

In [None]:
## Proportion of projects with "bad" travel results
len(bad_travel_id)/len(project_id)
## So there are 63% of projects with 0 or negative increase in travel for at least one mode or estimate

In [None]:
## Proportion of projects with negative travel results
len(negative_travel_id)/len(project_id)
## I think these are just the lower estimates with conventional-bike-lane

In [None]:
## Proportion of projects with zero travel results
len(zero_travel_id)/len(project_id)

In [None]:
safety[safety["Change in crashes"] >= 0]

In [None]:
## All projects with at least one "bad" safety result
bad_safety_id = safety[safety["Change in crashes"] >= 0]["Project ID"].unique()
good_safety_id = safety[safety["Change in crashes"] < 0]["Project ID"].unique()
## unique is not the best way to do this

## All projects with at least one positive safety result
positive_safety_id = safety[safety["Change in crashes"] > 0]["Project ID"].unique()
## All projects with at least one zero safety result
zero_safety_id = safety[safety["Change in crashes"] == 0]["Project ID"].unique()

In [None]:
## Proportion of projects with "bad" safety results
len(bad_safety_id)/len(project_id)
## EVERY SINGLE PROJECT has some increase in crashes?!
## should split this out by mode, location type, outcome, estimate etc

In [None]:
## Proportion of projects with positive safety results
len(positive_safety_id)/len(project_id)

In [None]:
## Proportion of projects with zero safety results
len(zero_safety_id)/len(project_id)

In [None]:
len(good_safety_id)/len(project_id)
## but half of the projects have at least one "good" (negative/decrease in crashes) safety result
## Maybe this means they overlap somehow - part of one project is positive and another part is negative
## separate by mode, location type, outcome

In [None]:
travel_elements[travel_elements["Increase in travel"] < 0]
## Aha - so conventional bike lane is resulting in decreased travel for some reason
## look at the lookup table for the lower estimate of conventional-bike-lane
## yes, confirmed that conventional bike lane has a 21 % decrease in travel (-21%) for the lowest estimate, this is expected behavior

In [None]:
travel[travel["Total Increase in Travel"] < 0]
## So for all of these projects, the negative change in travel is only because of conventional bike lane

### Graphs

#### Graph 1: Proportion of projects with negative or zero travel results
#### Graph 2: Proportion of projects with positive or zero safety results

#### Graph 3: overall travel results across all projects
1. Separate by mode
2. Use only mean for now for simplicity and because mean is what is primarily displayed in the tool (I think)

In [None]:
travel["Existing Travel"].groupby(travel["M Mode"]).plot(legend="true")

In [None]:
travel["Weighted Existing Travel"].groupby(travel["M Mode"]).plot(legend="true")

In [None]:
travel["Total Increase in Travel"].groupby(travel["M Mode"]).plot(legend="true")

In [None]:
travel_mean = travel[travel["K Estimate"] == "mean"]

In [None]:
travel_mean["Total Increase in Travel"].groupby(travel["M Mode"]).plot(legend="true")
## Interesting how the two modes don't really line up, as they do for safety results!

In [None]:
travel_mean[travel_mean["M Mode"] == "bicycling"].sort_values(by=["Total Increase in Travel"]).reset_index()["Total Increase in Travel"].plot()
## when this isn't set to just the mean values, there are a very small number of negatives here - because the lower estimate for conventional bike lane is negative as described above

In [None]:
travel_mean[travel_mean["M Mode"] == "walking"].sort_values(by=["Total Increase in Travel"]).reset_index()["Total Increase in Travel"].plot()

#### Graph 4: overall safety results across all projects
For now...
1. Start with just crashes (not injury/death) for simplicity
2. Separate by mode (summed across location type)
3. Maybe should also separate by estimate type? (maybe some kind of range of values) but anyway I think this gives a pretty clear answer

In [None]:
safety_crash = safety[safety["O Outcome"] == "crash"]

In [None]:
safety_crash["Change in crashes"].plot()
## safety_crash_mean = safety_crash[safety_crash["K Estimate"] == "mean"]["Change in crashes"].plot()

In [None]:
safety_crash[safety_crash["M Mode"] == "bicycling"]["Change in crashes"].plot()

In [None]:
safety_crash[safety_crash["M Mode"] == "walking"]["Change in crashes"].plot()

In [None]:
safety_crash[safety_crash["M Mode"] == "combined"]["Change in crashes"].plot()

In [None]:
safety_crash[safety_crash["M Mode"] == "bicycling"].sort_values(by=["Change in crashes"]).reset_index()["Change in crashes"].plot()

In [None]:
safety_crash[safety_crash["M Mode"] == "walking"].sort_values(by=["Change in crashes"]).reset_index()["Change in crashes"].plot()

In [None]:
## TODO: combine all of these sorted graphs into one graph with all modes layered
safety_crash[safety_crash["M Mode"] == "combined"].sort_values(by=["Change in crashes"]).reset_index()["Change in crashes"].plot()

In [None]:
safety_crash["Change in crashes"].groupby(safety_crash["M Mode"]).plot(legend="true")

In [None]:
## Finally, let's just check the change in crashes over project time frame to the base crash change
## this is what is reported in the tool (but should be basically proportional to the original)
safety_crash["Change in crashes over project time frame"].groupby(safety_crash["M Mode"]).plot(legend="true")
safety_crash["Change in crashes"].groupby(safety_crash["M Mode"]).plot(legend="true")
## Not super helpful though because the numbers are so huge - but this gives a good picture of how there are quite a few projects supposedly increasing many thousands of crashes

So the result is basically that almost every single project has positive safety results (increase in crashes)

- Most projects are generally around 0 crash change
- a few have very large (several thousand) crash change
- a few have (the expected) negative change

These all go up even further to a change of thousands of crashes once they are scaled to the project time frame

So yes, this is in fact a very widespread issue with the tool