In [1]:
import pandas as pd
import geopandas as gpd

import shared_utils
from utils import *
import intake
import gcsfs

import calitp
from calitp.tables import tbl
from siuba import *

# from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON, LegendControl
# from ipywidgets import Text, HTML



In [2]:
ca_block_joined = gpd.read_parquet(f'{GCS_FILE_PATH}block_population_joined.parquet')

In [3]:
all_stops = gpd.read_parquet(f'{GCS_FILE_PATH}all_stops.parquet')

In [4]:
accessible_stops_trips = gpd.read_parquet(f'{GCS_FILE_PATH}accessible_stops_trips.parquet')

In [5]:
rt_complete = pd.read_parquet(f'{GCS_FILE_PATH}rt_complete.parquet')

In [6]:
block_level_accessible = (gpd.read_parquet(f'{GCS_FILE_PATH}block_level_accessible.parquet')
                          .drop_duplicates(subset=['geo_id', 'calitp_itp_id'])
                          .reset_index()
                         )

In [7]:
block_level_static = (gpd.read_parquet(f'{GCS_FILE_PATH}block_level_static.parquet')
                          .drop_duplicates(subset=['geo_id', 'calitp_itp_id'])
                          .reset_index()
                         )

In [8]:
all_stops_rt = gpd.read_parquet(f'{GCS_FILE_PATH}all_stops_rt.parquet')

In [9]:
accessible_stops_trips_rt = gpd.read_parquet(f'{GCS_FILE_PATH}accessible_stops_trips_rt.parquet')

In [10]:
tract_pop_employ_filtered = gpd.read_parquet(f'{GCS_FILE_PATH}tract_pop_employ_filtered.parquet')

In [11]:
accessible_stops_trips_rt = accessible_stops_trips_rt >> select(-_.index_right, -_.index_left)

In [12]:
acc_rt_employ = (tract_pop_employ_filtered
                    .sjoin(accessible_stops_trips_rt, how='inner', predicate='intersects')
                    .drop_duplicates(subset=['Tract'])
                   )

In [13]:
all_stops_rt = all_stops_rt >> select(-_.index_right, -_.index_left)

In [14]:
all_rt_employ = (tract_pop_employ_filtered
                    .sjoin(all_stops_rt, how='inner', predicate='intersects')
                    .drop_duplicates(subset=['Tract'])
                   )

In [15]:
accessible_employment_joined = (tract_pop_employ_filtered
                    .sjoin(accessible_stops_trips, how='inner', predicate='intersects')
                    # .drop_duplicates(subset=['Tract'])
                   ) >> select(-_.index_right, -_.index_left)

In [16]:
all_employment_joined = (tract_pop_employ_filtered
                    .sjoin(all_stops, how='inner', predicate='intersects')
                    # .drop_duplicates(subset=['Tract'])
                   ) >> select(-_.index_right, -_.index_left)

In [17]:
row_metrics = {'Population': 'block_pop', 'Land Area': 'area', 'Jobs (<4sq km tracts only)': 'num_jobs'}
col_geographies = {'GTFS Static': [block_level_static.drop_duplicates(subset=['geo_id']),
                                   all_employment_joined.drop_duplicates(subset=['Tract'])],
                   'Accessible Static': [block_level_accessible.drop_duplicates(subset=['geo_id']),
                                         accessible_employment_joined.drop_duplicates(subset=['Tract'])],
                   'GTFS RT': [all_stops_rt.drop_duplicates(subset=['geo_id']), all_rt_employ],
                   'Accessible RT': [accessible_stops_trips_rt.drop_duplicates(subset=['geo_id']),
                                     acc_rt_employ.drop_duplicates(subset=['Tract'])]}

In [18]:
summary_df = pd.DataFrame()

for row in row_metrics.keys():
    to_append = {}
    if row == 'Jobs (<4sq km tracts only)':
        for col in col_geographies.keys():
            metric = calculate_access_proportion(col_geographies[col][1], tract_pop_employ_filtered, row_metrics[row])
            to_append[col] = metric
    else:
        for col in col_geographies.keys():
            metric = calculate_access_proportion(col_geographies[col][0], ca_block_joined, row_metrics[row])
            to_append[col] = metric
    to_append = pd.DataFrame(to_append, index = [row])
    summary_df = summary_df.append(to_append)    

## Summary of all Metrics

* Population metrics: percent of CA population within a block group near a qualifying transit stop, excluding block groups > 4 sq km
* Land Area metrics: percent of CA land area made up of block groups near a qualifying transit stop, excluding block groups > 4 sq km
* Employment metrics: percent of CA jobs in a census tract near a qualifying transit stop, excluding tracts > 4 sq km
    * this currently means the analysis only looks at about 60% of CA jobs, it could be made more precise by pulling finer-grained employment data if desired
    * likely makes transit look "better" than a job analysis of all jobs, since these urban/suburban tracts are probably more likely to have at least some transit service

In [19]:
summary_df

Unnamed: 0,GTFS Static,Accessible Static,GTFS RT,Accessible RT
Population,85.95,10.02,59.65,3.67
Land Area,11.58,0.93,5.33,0.42
Jobs (<4sq km tracts only),99.32,18.61,83.52,12.57
