In [5]:
# header info
import pandas as pd
import geopandas as gpd
from siuba import *
import numpy as np
from shared_utils import utils
pd.options.display.max_columns = 100
import gcsfs
from calitp_data_analysis.sql import to_snakecase

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"

# CSIS Data Entry Scoring - Safety

## Part 1: Evaluate Crash Reduction Factors

In [10]:
safety_data = pd.read_parquet(f'{GCS_FILE_PATH}data_entry_raw_safety.parquet')

In [4]:
safety_data.head()

Unnamed: 0,submission_log_number,program,project_name,data_enterer_name,done_y_n,notes,countermeasure_1,crf_1,reference_1,countermeasure_2,crf_2,reference_2,countermeasure_3,crf_3,reference_3,countermeasure_4,crf_4,reference_4,countermeasure_5,crf_5,reference_5,countermeasure_6,crf_6,reference_6
0,1,SCCP,U.S. 101 Connected Communities Corridor Rail a...,Llisel Ayon,Y,The majority of these countermeasures will be ...,"Bicycle Lanes, Mode: Bike/Ped",0.3,https://highways.dot.gov/safety/proven-safety-...,"Rectangular Rapid Flashing Beacons, Mode: Bike...",0.47,https://highways.dot.gov/safety/proven-safety-...,"Pedestrain Hybrid Beacons (HAWK signals), Mode...",0.29,https://highways.dot.gov/safety/proven-safety-...,"Walkways, Mode: Ped",0.65,https://highways.dot.gov/safety/proven-safety-...,,,,,,
1,2,SCCP,Watsonville-Santa Cruz Multimodal Corridor Pro...,Llisel Ayon,Y,,"Bicycle Lanes, Mode: Bike",0.49,https://highways.dot.gov/safety/proven-safety-...,"Walkways (sidewalk gap closures), Mode: Ped",0.65,https://highways.dot.gov/safety/proven-safety-...,"Rectangular Rapid Flashing Beacons, Mode: Ped",0.47,https://highways.dot.gov/safety/proven-safety-...,,,,,,,,,
2,3,SCCP,SMART Windsor Rail System Extension Project,Llisel Ayon,Y,,"Bicycle Lanes, Mode: Bike",0.3,https://highways.dot.gov/safety/proven-safety-...,,,,,,,,,,,,,,,
3,4,SCCP,Metrolink Lilac to Sycamore Avenue Double Trac...,Llisel Ayon,Y,Double Track Rail. With new platform. No PSCMS...,,,,,,,,,,,,,,,,,,
4,5,SCCP,Inglewood Transit Connector (ITC) Project,Nick Compin,Y,Existing lane width is critical to impact 11' ...,Bike lanes,0.27,https://www.cmfclearinghouse.org/study_detail....,,,,,,,,,,,,,,,


In [11]:
safety_data >> count(_.done_y_n)

Unnamed: 0,done_y_n,n
0,Y,53
1,,27


In [13]:
# count scored projects
safety_data >> count(_.done_y_n,_.crf_1)

Unnamed: 0,done_y_n,crf_1,n
0,Y,0.008,1
1,Y,0.13,1
2,Y,0.14,1
3,Y,0.2,4
4,Y,0.25,1
5,Y,0.27,1
6,Y,0.28,1
7,Y,0.29,1
8,Y,0.3,4
9,Y,0.32,1


In [15]:
# replace missing CRFs with 0 for math
safety_data[["crf_1","crf_2","crf_3","crf_4","crf_5","crf_6"]] = safety_data[["crf_1","crf_2","crf_3","crf_4","crf_5","crf_6"]].fillna(0)

Reference: https://www.cmfclearinghouse.org/collateral/Combining_Multiple_CMFs_Final.pdf 
CCRFi = 1 – [(1-CRF1i)*(1-CRF2i)*(1-CRF3i)] 

In [19]:
# combine crfs
safety_data = (safety_data
              >> mutate(crf_combined = 1-((1-_.crf_1)*(1-_.crf_2)*(1-_.crf_3)*(1-_.crf_4)*(1-_.crf_5)*(1-_.crf_6)),
                        crf_cat = case_when({_.crf_combined>=0.1 : "High",
                                             _.crf_combined==0 : "None",
                                             _.crf_combined<0.1 : "Low"
                                            })
                       )
              )

In [21]:
safety_data >> count(_.done_y_n,_.crf_cat, _.crf_combined)

Unnamed: 0,done_y_n,crf_cat,crf_combined,n
0,Y,High,0.13,1
1,Y,High,0.14,1
2,Y,High,0.2,3
3,Y,High,0.27,1
4,Y,High,0.3,2
5,Y,High,0.43,2
6,Y,High,0.4384,1
7,Y,High,0.45,1
8,Y,High,0.4808,1
9,Y,High,0.48608,1


In [22]:
safety_data >> filter(_.crf_combined>0.9)

Unnamed: 0,submission_log_number,program,project_name,data_enterer_name,done_y_n,notes,countermeasure_1,crf_1,reference_1,countermeasure_2,crf_2,reference_2,countermeasure_3,crf_3,reference_3,countermeasure_4,crf_4,reference_4,countermeasure_5,crf_5,reference_5,countermeasure_6,crf_6,reference_6,crf_combined,crf_cat
0,1,SCCP,U.S. 101 Connected Communities Corridor Rail a...,Llisel Ayon,Y,The majority of these countermeasures will be ...,"Bicycle Lanes, Mode: Bike/Ped",0.3,https://highways.dot.gov/safety/proven-safety-...,"Rectangular Rapid Flashing Beacons, Mode: Bike...",0.47,https://highways.dot.gov/safety/proven-safety-...,"Pedestrain Hybrid Beacons (HAWK signals), Mode...",0.29,https://highways.dot.gov/safety/proven-safety-...,"Walkways, Mode: Ped",0.65,https://highways.dot.gov/safety/proven-safety-...,,0.0,,,0.0,,0.907806,High
1,2,SCCP,Watsonville-Santa Cruz Multimodal Corridor Pro...,Llisel Ayon,Y,,"Bicycle Lanes, Mode: Bike",0.49,https://highways.dot.gov/safety/proven-safety-...,"Walkways (sidewalk gap closures), Mode: Ped",0.65,https://highways.dot.gov/safety/proven-safety-...,"Rectangular Rapid Flashing Beacons, Mode: Ped",0.47,https://highways.dot.gov/safety/proven-safety-...,,0.0,,,0.0,,,0.0,,0.905395,High
8,10,SCCP,East Bay Greenway Multimodal Project Phase 1,Llisel Ayon,Y,Additional countermeasures include walkways to...,"Crosswalk Visibility Enhancements, Mode: Ped",0.4,https://highways.dot.gov/safety/proven-safety-...,"Bicycle Lanes, Mode: Bike",0.49,https://highways.dot.gov/safety/proven-safety-...,"Rectangular Rapid Flashing Beacons, Mode: Ped",0.47,https://highways.dot.gov/safety/proven-safety-...,"Leading Pedestrian Interval, Mode: Ped",0.13,https://highways.dot.gov/safety/proven-safety-...,"Median and Pedestrian Crossing Islands, Mode: Ped",0.56,https://highways.dot.gov/safety/proven-safety-...,"Pedestrain Hybrid Beacons, Mode: Ped",0.29,https://highways.dot.gov/safety/proven-safety-...,0.955921,High
30,18,TCEP,Konocti Corridor - Segment 2B,Nick Compin,Y,,Convert 2 - 4 lane,0.29,https://www.cmfclearinghouse.org/study_detail....,shoulders,0.87,https://www.cmfclearinghouse.org/study_detail....,remove fixed objects,0.13,https://www.cmfclearinghouse.org/study_detail....,,0.0,,,0.0,,,0.0,,0.919699,High
37,20,SCCP,Silicon Valley Express Lanes Program – Phase 5,Llisel Ayon,Y,Reduction is for cross-median crashes.,"Median Barriers, Mode: Auto",0.97,https://highways.dot.gov/safety/proven-safety-...,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,0.97,High


In [23]:
safety_data >> filter(_.crf_combined<0.2, _.crf_combined>0)

Unnamed: 0,submission_log_number,program,project_name,data_enterer_name,done_y_n,notes,countermeasure_1,crf_1,reference_1,countermeasure_2,crf_2,reference_2,countermeasure_3,crf_3,reference_3,countermeasure_4,crf_4,reference_4,countermeasure_5,crf_5,reference_5,countermeasure_6,crf_6,reference_6,crf_combined,crf_cat
5,7,SCCP,I-5 Managed Lanes,Nick Compin,Y,"Impact on Free, HOT, or All Lanes?",Convert HOV to HOT,0.2,https://www.cmfclearinghouse.org/study_detail....,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,0.2,High
6,8,SCCP,Bay Skyway Phase 1,Nick Compin,Y,,Multi-use path,0.14,https://www.cmfclearinghouse.org/study_detail....,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,0.14,High
31,19,TCEP,Fix 5 Cascade Gateway,Nick Compin,Y,Onramp/truck climbing no exact PSCM CRFs,auxiliary lanes,0.2,https://www.cmfclearinghouse.org/study_detail....,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,0.2,High
35,23,TCEP,I-710 Integrated Corridor Management (ICM),Llisel Ayon,Y,,Leading Pedestrian Interval,0.13,https://highways.dot.gov/safety/proven-safety-...,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,0.13,High
49,47,TCEP,Interstate 10 Corridor Freight and Managed Lan...,Nick Compin,Y,,Aux Lanes,0.2,https://www.cmfclearinghouse.org/study_detail....,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,0.2,High


## Part 2: Evaluate Safety Need

In [25]:
project_geo = gpd.read_parquet(f'{GCS_FILE_PATH}Survey123_Geo/cleaned_survey123_sample13.parquet')

In [26]:
project_geo.explore()