In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) ## 800GB?

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca
import folium
import shared_utils

from siuba import *
import pandas as pd
import geopandas as gpd

pd.set_option('display.max_columns', None) 

import datetime as dt
import time

from calitp import get_engine

engine = get_engine()
connection = engine.connect()



# Join Trips per Stop, Ridership, and ACS data

In [16]:
# start with trips per stop and ridership
stoptrips = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/ahsc_grant/tbl1_trips_perstop.parquet")
stoptrips >> head (5)

Unnamed: 0,stop_desc,location_type,route_type,stop_name,stop_id,stop_code,calitp_itp_id,geometry,n_trips_ampeak,n_trips_midday,n_trips_pmpeak,n_trips_sat,n_trips_sun
0,,,3,Sunset / Park,8030,8030,182,POINT (-118.26181 34.07778),22.0,30.0,23.0,61.0,61.0
1,,,3,Fallbrook / Roscoe,10418,10418,182,POINT (-118.62363 34.21911),15.0,15.0,16.0,25.0,25.0
2,,,3,Vanowen / Woodman,15246,15246,182,POINT (-118.43104 34.19407),16.0,15.0,15.0,25.0,25.0
3,,,3,Wilshire / Doheny,8417,8417,182,POINT (-118.38985 34.06694),16.0,21.0,16.0,47.0,47.0
4,,,3,Fairfax / Rosewood,11986,11986,182,POINT (-118.36159 34.07991),23.0,31.0,27.0,63.0,63.0


In [3]:
ridership = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/ahsc_grant/rider_cleaned_182_2022_03.parquet")
ridership >> head (5)

Unnamed: 0,calitp_itp_id,stop_id,geometry,sat_ons,sun_ons,weekday_ons
0,182,12118,POINT (-118.28437 34.08156),9.0,13.0,51.0
1,182,5636,POINT (-118.29036 33.85010),10.0,10.0,173.0
2,182,12933,POINT (-118.53593 34.22100),129.0,113.0,1875.0
3,182,13458,POINT (-118.28280 33.94752),9.0,7.0,73.0
4,182,2351,POINT (-117.99940 33.88811),11.0,3.0,65.0


In [24]:
# join together, keep buses, create total trips per weekday
trips_ridership_joined = (stoptrips
                          >> full_join(_,ridership)
                          >> filter(_.route_type=="3")
                          >> mutate(n_trips_weekday = _.n_trips_ampeak+_.n_trips_midday+_.n_trips_pmpeak)
                         )

trips_ridership_joined >> head (5)

Unnamed: 0,stop_desc,location_type,route_type,stop_name,stop_id,stop_code,calitp_itp_id,geometry,n_trips_ampeak,n_trips_midday,n_trips_pmpeak,n_trips_sat,n_trips_sun,sat_ons,sun_ons,weekday_ons,n_trips_weekday
0,,,3,Sunset / Park,8030,8030,182,POINT (-118.26181 34.07778),22.0,30.0,23.0,61.0,61.0,149.0,135.0,981.0,75.0
1,,,3,Fallbrook / Roscoe,10418,10418,182,POINT (-118.62363 34.21911),15.0,15.0,16.0,25.0,25.0,5.0,6.0,94.0,46.0
2,,,3,Vanowen / Woodman,15246,15246,182,POINT (-118.43104 34.19407),16.0,15.0,15.0,25.0,25.0,176.0,118.0,1790.0,46.0
3,,,3,Wilshire / Doheny,8417,8417,182,POINT (-118.38985 34.06694),16.0,21.0,16.0,47.0,47.0,55.0,33.0,991.0,53.0
4,,,3,Fairfax / Rosewood,11986,11986,182,POINT (-118.36159 34.07991),23.0,31.0,27.0,63.0,63.0,34.0,54.0,1175.0,81.0


In [13]:
# add buffers to stops
trips_ridership_joined = trips_ridership_joined.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

# this replaces our point geometry with polygons
trips_ridership_joined.geometry = trips_ridership_joined.buffer(402.336)

In [17]:
# ACS data
acs_la = gpd.read_parquet("gs://calitp-analytics-data/data-analyses/ahsc_grant/acs_tbl_lacounty.parquet")
acs_la >> head (5)

Unnamed: 0,ALAND,geometry,geo_id,total_pop,households,not_us_citizen_pop,black_pop,hispanic_pop,inc_extremelylow,inc_verylow,inc_low,pop_determined_poverty_status,poverty,no_car,no_cars
0,3837562,"POLYGON ((-118.58119 34.14318, -118.58099 34.1...",6037137504,2073.0,694.0,23.0,19.0,64.0,30.0,29.0,102.0,2073.0,90.0,12.0,10.0
1,4472196,"POLYGON ((-118.60573 34.14585, -118.60561 34.1...",6037138000,4673.0,1784.0,198.0,325.0,393.0,270.0,124.0,196.0,4673.0,386.0,0.0,19.0
2,1152031,"POLYGON ((-118.53082 34.18024, -118.52952 34.1...",6037139200,5840.0,2172.0,815.0,153.0,1330.0,242.0,315.0,548.0,5840.0,602.0,66.0,108.0
3,957093,"POLYGON ((-118.37899 34.15409, -118.37888 34.1...",6037143200,4210.0,2083.0,167.0,375.0,539.0,303.0,386.0,378.0,4210.0,455.0,172.0,103.0
4,1649223,"POLYGON ((-118.39648 34.15761, -118.39538 34.1...",6037143300,6730.0,3113.0,421.0,477.0,1270.0,436.0,565.0,415.0,6711.0,384.0,62.0,117.0


In [18]:
# project
acs_la = acs_la.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [21]:
# join to bus buffers - first output is at tract level
stops_acs_joined = trips_ridership_joined.sjoin(acs_la, how='inner', predicate='intersects')
stops_acs_joined.geo_id.value_counts()

06037207710    145
06037207900    123
06037207301    122
06037532400    115
06037226002    105
              ... 
06037577000      1
06037267902      1
06037430400      1
06037554516      1
06037264103      1
Name: geo_id, Length: 1741, dtype: int64

In [23]:
# roll back up to stop level - sum the counts of people/households in tracts touching the buffer
stops_acs_rollup = (stops_acs_joined
                    >> group_by(_.calitp_itp_id,_.stop_id, _.n_trips_ampeak,_.n_trips_midday,_.n_trips_pmpeak,
                             _.n_trips_sat,_.n_trips_sun,_.sat_ons,_.sun_ons,_.weekday_ons,_.n_trips_weekday)
                    >> summarize(sum_total_pop = _.total_pop.sum(),
                                 sum_households = _.households.sum(),
                                 sum_not_us_citizen_pop = _.not_us_citizen_pop.sum(),
                                 sum_black_pop = _.black_pop.sum(),
                                 sum_hispanic_pop = _.hispanic_pop.sum(),
                                 sum_inc_extremelylow = _.inc_extremelylow.sum(),
                                 sum_inc_verylow = _.inc_verylow.sum(),
                                 sum_inc_low = _.inc_low.sum(),
                                 sum_pop_determined_poverty_status = _.pop_determined_poverty_status.sum(), #denominator for poverty rate
                                 sum_poverty = _.poverty.sum(),
                                 sum_no_car = _.no_car.sum(), #workers without access to car
                                 sum_no_cars = _.no_cars.sum(), #households without car
                                 sum_land_area = _.ALAND.sum(),
                                )
                    >> ungroup()
                   )

stops_acs_rollup >> head (5)
                    

Unnamed: 0,calitp_itp_id,stop_id,n_trips_ampeak,n_trips_midday,n_trips_pmpeak,n_trips_sat,n_trips_sun,sat_ons,sun_ons,weekday_ons,n_trips_weekday,sum_total_pop,sum_households,sum_not_us_citizen_pop,sum_black_pop,sum_hispanic_pop,sum_inc_extremelylow,sum_inc_verylow,sum_inc_low,sum_pop_determined_poverty_status,sum_poverty,sum_no_car,sum_no_cars,sum_land_area
0,182,1,13.0,15.0,15.0,44.0,44.0,16.0,24.0,214.0,43.0,4031.0,1093.0,696.0,15.0,3766.0,222.0,303.0,197.0,3866.0,338.0,42.0,60.0,3707188
1,182,10000002,6.0,7.0,7.0,12.0,12.0,20.0,7.0,102.0,20.0,16203.0,4603.0,3061.0,2968.0,9900.0,975.0,1090.0,1146.0,16001.0,2844.0,266.0,364.0,3243477
2,182,10033,12.0,15.0,12.0,24.0,24.0,22.0,24.0,205.0,39.0,14102.0,4613.0,2455.0,5559.0,7016.0,1357.0,911.0,817.0,13952.0,2738.0,128.0,524.0,2714150
3,182,10034,12.0,15.0,12.0,24.0,24.0,7.0,5.0,132.0,39.0,18631.0,6567.0,2914.0,8455.0,8467.0,2147.0,1300.0,1201.0,18481.0,3814.0,290.0,945.0,3565703
4,182,10035,12.0,15.0,12.0,23.0,23.0,38.0,34.0,371.0,39.0,15654.0,5108.0,2848.0,4657.0,9669.0,1324.0,1213.0,1232.0,15469.0,3570.0,207.0,443.0,2568500
