In [1]:
import pandas as pd
import geopandas as gpd

import shared_utils
from utils import *
import intake
import gcsfs

import calitp
from calitp.tables import tbl
from siuba import *



In [2]:
catalog = intake.open_catalog('./catalog.yml')

# Accessible Transit Metrics

### New Accessibilty Metric (Area)

The % of non-water area of California that is within 1/4 mi of a bus stop or 1 mi of a ferry/rail stop that is explicitly wheelchair accessible (and if in a station, that station has explicit pathways coding), and that has is served by a public-funded, open to the general public transit service with GTFS Schedule data that is served by a service that is explicitly wheelchair accessible

### New Accessibility Metric (Population)

 The % of Californians that are within 1/4 mi of a bus stop or 1 mi of a ferry/rail stop that is explicitly wheelchair accessible (and if in a station, that station has explicit pathways coding), and that has is served by a public-funded, open to the general public transit service with GTFS Schedule data that is served by a service that is explicitly wheelchair accessible

### Notes and Caveats:

* The vast majority of accessible services don't appear to provide accessibility data in GTFS
* Pathways appears to be empty in data warehouse for now...

In [3]:
total_pop_var = 'B01001_001E'

In [4]:
## can hit Census API limits with frequent runs?

# blockgrp_pop = get_census_ca_counties(total_pop_var, 'block%20group')
# blockgrp_pop = blockgrp_pop.rename(columns={'B01001_001E': 'block_grp_pop',
#                                            'block group': 'block_grp'})

In [5]:
## read pre-fetched 2019 ACS data from catalog
blockgrp_pop = (catalog.ca_bg_population.read()
                .rename(columns={'block group': 'block_grp',
                                'population': 'block_grp_pop'})
                .astype({'block_grp_pop': 'int64', 'state': 'int64',
                        'county': 'int64', 'tract': 'int64',
                        'block_grp': 'int64'})
               )

In [6]:
blockgrp_pop.head(3)

Unnamed: 0,NAME,block_grp_pop,state,county,tract,block_grp
0,"Block Group 2, Census Tract 12, Merced County,...",1388,6,47,1200,2
1,"Block Group 4, Census Tract 12, Merced County,...",1531,6,47,1200,4
2,"Block Group 1, Census Tract 12, Merced County,...",1405,6,47,1200,1


In [7]:
accessible_stops_trips = get_stops_and_trips(filter_accessible = True)

In [8]:
accessible_stops_trips.head(3)

Unnamed: 0,stop_id,route_type,stop_lon,stop_lat,calitp_itp_id,calitp_url_number,wheelchair_boarding,wheelchair_accessible,geometry
0,3820402,3,-122.077278,37.394672,217,0,1,1,POINT (-183632.839 -67099.301)
1,6684616,3,-122.098777,37.431429,217,0,1,1,POINT (-185442.541 -62972.434)
2,7269843,3,-122.111591,37.404887,217,0,1,1,POINT (-186640.338 -65897.079)


In [9]:
bus_route_types = ['3', '11']

def buffer_by_route_type(row):
    '''
    Buffer bus stops by 400 meters (.25mi),
    rail/ferry by 1600 meters (1mi)
    '''
    if row.route_type in bus_route_types:
        row.geometry = row.geometry.buffer(400)
    else:
        row.geometry = row.geometry.buffer(1600)
    return row

In [10]:
accessible_stops_trips = accessible_stops_trips.apply(buffer_by_route_type, axis=1)

In [11]:
ca_block_geo = catalog.ca_block_groups.read()
ca_block_geo = ca_block_geo.to_crs('EPSG:4326')

In [12]:
stanford_shorelines = catalog.stanford_shorelines.read()

In [13]:
ca_shoreline = stanford_shorelines >> filter(_.STFIPS == '06')

In [14]:
ca_block_geo = ca_block_geo.clip(ca_shoreline)

In [15]:
ca_block_geo = ca_block_geo.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [16]:
## drop large block groups (not useful access data...)
## 4 sq km threshold
ca_block_geo = ca_block_geo[ca_block_geo.geometry.area < 4e+06]

In [17]:
ca_block_geo = ca_block_geo.astype({'STATEFP': 'int64',
                                            'COUNTYFP': 'int64',
                                            'TRACTCE': 'int64',
                                            'BLKGRPCE': 'int64'})
ca_block_geo = ca_block_geo.rename(columns={'STATEFP': 'state',
                                                   'COUNTYFP': 'county',
                                                   'TRACTCE': 'tract',
                                                   'BLKGRPCE': 'block_grp'})

In [18]:
geo_stops_joined = ca_block_geo.sjoin(accessible_stops_trips, how='inner', predicate='intersects')

In [19]:
geo_stops_joined = geo_stops_joined.drop_duplicates(subset=['GEOID'])

In [20]:
geo_stops_joined.head(3)

Unnamed: 0,state,county,tract,block_grp,GEOID,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,...,geometry,index_right,stop_id,route_type,stop_lon,stop_lat,calitp_itp_id,calitp_url_number,wheelchair_boarding,wheelchair_accessible
16760,6,73,10009,1,60730100091,Block Group 1,G5030,S,1759046,528681,...,"MULTIPOLYGON (((275529.078 -603346.514, 275528...",3591,60601,3,-117.042087,32.545331,278,0,1,1
18464,6,73,10009,2,60730100092,Block Group 2,G5030,S,422248,0,...,"POLYGON ((277015.844 -602387.172, 277237.214 -...",3591,60601,3,-117.042087,32.545331,278,0,1,1
18172,6,73,10009,4,60730100094,Block Group 4,G5030,S,621685,0,...,"POLYGON ((278109.830 -603029.458, 278179.036 -...",3591,60601,3,-117.042087,32.545331,278,0,1,1


In [21]:
stops_pop_joined = geo_stops_joined >> inner_join(_, blockgrp_pop, on=['state', 'county', 'tract', 'block_grp'])

In [22]:
stops_pop_joined['block_grp_pop'].sum()

2378168

In [23]:
blockgrp_pop['block_grp_pop'].sum()

39283497

#### Current Percentage of CA population in block groups within .25mi of accessible transit

In [24]:
((stops_pop_joined['block_grp_pop'].sum() / blockgrp_pop['block_grp_pop'].sum()) * 100).round(2)

6.05

In [25]:
geo_stops_joined.geometry.area.sum() ## sq meters

948081833.6124767

In [26]:
ca_block_geo.geometry.area.sum()

14393648284.97847

#### Current Percentage of CA land area in block groups within .25mi of accessible transit

In [27]:
((geo_stops_joined.geometry.area.sum() / ca_block_geo.geometry.area.sum()) * 100).round(2)

6.59

### Map

In [28]:
# simple_map(geo_stops_joined, 'calitp_itp_id')

#### Static Map (block groups near accessible transit, Bay Area)

![bay area accessiblity](accessible.png)

# General Transit Metrics

### New General Metric (by area):

The % of non-water area of Californian that is within 1/4 mi of a bus stop or 1 mi of a ferry/rail stop that has is served by a public-funded, open to the general public transit service with GTFS Schedule data


### New General Metric (by population):

The % of Californians that live within 1/4 mi of a bus stop or 1 mi of a ferry/rail stop that has is served by a public-funded, open to the general public transit service with GTFS Schedule data


In [29]:
all_stops = get_stops_and_trips(filter_accessible = False)

In [30]:
all_stops.head(3)

Unnamed: 0,stop_id,route_type,stop_lon,stop_lat,calitp_itp_id,calitp_url_number,wheelchair_boarding,wheelchair_accessible,geometry
0,65,3,-114.646275,32.683792,386,0,,,POINT (502573.151 -577852.724)
1,2628783,3,-117.962626,34.086535,29,0,0.0,,POINT (188001.571 -434547.756)
2,11010,3,-121.163303,38.670296,111,0,,,POINT (-101102.542 73312.845)


In [31]:
all_stops_joined = (ca_block_geo
                    .sjoin(all_stops, how='inner', predicate='intersects')
                    .drop_duplicates(subset=['GEOID'])
                   )

In [32]:
all_stops_pop_joined = all_stops_joined >> inner_join(_, blockgrp_pop, on=['state', 'county', 'tract', 'block_grp'])

#### Current Percentage of CA population in block groups near transit with GTFS Schedule data

In [33]:
((all_stops_pop_joined['block_grp_pop'].sum() / blockgrp_pop['block_grp_pop'].sum()) * 100).round(2)

51.36

#### Current Percentage of CA land area in block groups near transit with GTFS Schedule data

In [34]:
((all_stops_joined.geometry.area.sum() / ca_block_geo.geometry.area.sum()) * 100).round(2)

56.45

## Employment Metrics

In [35]:
service_path = 'gs://calitp-analytics-data/data-analyses/bus_service_increase/'

In [36]:
## Read in processed df from bus_service_increase/B1
tract_pop_employ = gpd.read_parquet(f'{service_path}bus_stop_times_by_tract.parquet')
tract_pop_employ = tract_pop_employ >> select(-_.num_arrivals, -_.stop_id, -_.itp_id)

In [37]:
tract_pop_employ.head(3)

Unnamed: 0,Tract,ZIP,Population,sq_mi,pop_sq_mi,overall_ptile,pollution_ptile,popchar_ptile,equity_group,pollution_group,popchar_group,County,City,geometry,num_jobs,jobs_sq_mi,num_pop_jobs,popjobs_sq_mi
0,6001400100,94704,3120,2.655917,1174.735672,2.79879,26.621033,1.525466,1.0,1,1.0,Alameda,Oakland,"POLYGON ((-122.24408 37.88322, -122.24198 37.8...",936,352.420702,4056,1527.156374
1,6001400200,94618,2007,0.229901,8729.842564,2.874433,24.181705,1.651538,1.0,1,1.0,Alameda,Oakland,"POLYGON ((-122.24191 37.85181, -122.24202 37.8...",1357,5902.539292,3364,14632.381857
2,6001400300,94618,5051,0.427356,11819.185546,15.935451,33.366521,12.266768,1.0,2,1.0,Alameda,Oakland,"POLYGON ((-122.24590 37.84500, -122.25241 37.8...",1978,4628.459515,7029,16447.645061


In [38]:
tract_pop_employ = tract_pop_employ.to_crs(
                        shared_utils.geography_utils.CA_NAD83Albers)

In [39]:
tract_pop_employ.crs

<Derived Projected CRS: EPSG:3310>
Name: NAD83 / California Albers
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: United States (USA) - California.
- bounds: (-124.45, 32.53, -114.12, 42.01)
Coordinate Operation:
- name: California Albers
- method: Albers Equal Area
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [55]:
## option to filter out large tracts (not useful access data...)
## 4 sq km threshold
tract_pop_employ['under_4_sq_km'] = tract_pop_employ.geometry.area < 4e+06

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [56]:
tract_pop_employ >> group_by('under_4_sq_km') >> summarize(jobs = _.num_jobs.sum())

Unnamed: 0,under_4_sq_km,jobs
0,True,10386412


In [57]:
## filter out large tracts
tract_pop_employ = tract_pop_employ >> filter(_.under_4_sq_km)

In [58]:
all_employment_joined = (tract_pop_employ
                    .sjoin(all_stops, how='inner', predicate='intersects')
                    .drop_duplicates(subset=['Tract'])
                   )

In [59]:
accessible_employment_joined = (tract_pop_employ
                    .sjoin(accessible_stops_trips, how='inner', predicate='intersects')
                    .drop_duplicates(subset=['Tract'])
                   )

In [62]:
((all_employment_joined.geometry.area.sum() / tract_pop_employ.geometry.area.sum()) * 100).round(2)

73.19

In [63]:
((accessible_employment_joined.geometry.area.sum() / tract_pop_employ.geometry.area.sum()) * 100).round(2)

8.11

In [64]:
((accessible_employment_joined.Population.sum() / tract_pop_employ.Population.sum()) * 100).round(2)

8.06

In [65]:
((all_employment_joined.Population.sum() / tract_pop_employ.Population.sum()) * 100).round(2)

74.92