In [None]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

# import _utils # amanda's collected utils

import pandas as pd
import geopandas as gpd
from siuba import *

import shared_utils
import datetime as dt

import gcsfs

from calitp_data.storage import get_fs
fs = get_fs()

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"

# Spatially Join SWITRS Data to Projects

In [None]:
# load projects
with get_fs().open(f'{GCS_FILE_PATH}nonshopp_gdf.geojson') as f:
    all_proj = gpd.read_file(f)

In [None]:
all_proj.head()

In [None]:
# buffer the projects w/ geometry - start w/ 100ft?
all_proj_geom = (all_proj
                   >> filter(_.geometry.is_valid)
                   )

In [None]:
# re-project
all_proj_geom = all_proj_geom.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [None]:
all_proj_geom.explore(tiles="cartodbpositron")

In [None]:
# add buffer - 100 ft ~ 30m
all_proj_geom['b100'] = all_proj_geom.buffer(30)
all_proj_geom['b200'] = all_proj_geom.buffer(61)

In [None]:
all_proj_geom = all_proj_geom.set_geometry('b200')

In [None]:
# load safety data
switrs = gpd.read_parquet(f'{GCS_FILE_PATH}SWITRS_clean.parquet')

In [None]:
# drop dt for folium mapping
switrs = (switrs >> select(-_.collision_dt))

In [None]:
switrs.groupby(switrs.sc_collision_severity).count()

In [None]:
# test mapping crashes only
switrs.head(100).explore(tiles="cartodbpositron")

In [None]:
# clip crashes to those within buffer
switrs_clip = switrs.clip(all_proj_geom)

In [None]:
len(switrs_clip)

In [None]:
switrs_clip.explore("sc_collision_severity",tiles="cartodbpositron")

In [None]:
# spatial join - left side is safety data, each obs will be a crash point intersecting the project area
switrs_proj = switrs.sjoin(all_proj_geom, how='right', predicate='intersects')

In [None]:
# convert ped, bike accident to numeric flags
switrs_proj = (switrs_proj
              >> mutate(pedflag = if_else(_.pedestrian_accident=='Y',1,0),
                        bikeflag = if_else(_.bicycle_accident=='Y',1,0)
                ))

In [None]:
# Aggregate up fatals, severely injured, etc to each project
# can't group by geometry because it's not orderable and there are different types
switrs_proj_agg = (switrs_proj
                    >> group_by(_.ppno, _.project_name, _.detailed_project_title)
                    >> summarize(sum_fatals = _.number_killed.sum(),
                                 sum_injured = _.number_injured.sum(),
                                 sum_ped = _.pedflag.sum(),
                                 sum_bike = _.bikeflag.sum()
                    )
                                )

In [None]:
# put back project geometry
switrs_proj_agg_geo = gpd.GeoDataFrame(data=(all_proj_geom
                  >> select(_.geometry,_.ppno,_.project_name,_.detailed_project_title)
                  >> right_join(_,switrs_proj_agg)
                  ))

In [None]:
switrs_proj_agg_geo.explore(tiles="cartodbpositron")

Test Metrics:
* n fatalities within 100ft buffer of project area over 5 year lookback period
    * test different temporal study periods; rural may need more time
    * test different buffers; different project types may need different sizes
* n fatalities+severely injured (KSIs) w/in buffer area
* n ped/bike crashes w/in buffer area
* % ped/bike crashes out of all crashes w/in buffer area