In [1]:
import pandas as pd
import geopandas as gpd
from siuba import *

HQTA_PATH = 'gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/export/2024-09-18/ca_hq_transit_areas.parquet'

In [2]:
hqta = gpd.read_parquet(HQTA_PATH)

In [3]:
processed_census_path = 'gs://calitp-analytics-data/data-analyses/sb125/quick_mts_jobs/'

In [4]:
from calitp_data_analysis.utils import geoparquet_gcs_export

# Quick counts of people and jobs within ~0.5mi of a Major Transit Stop

Just wrapped this up, attached are two csvs. One summarizes jobs and population within a half mile of the various types of Major Transit Stop per PRC 21155 and PRC 21064.3, the other additionally separates them out by transit agency.

Used 2020 Census LEHD Workplace Area Characteristics to get all primary jobs by Census block, population by block is from the 2020 decennial Census. Spatial join was a simple intersection of each half-mile buffer around a major transit stop with all Census blocks, then jobs and people were summed across all those blocks to get a single figure per major stop.


## read clean census (already done, output on GCS)

In [7]:
import gzip
# all workers, all primary jobs
with gzip.open('./ca_wac_S000_JT01_2021.csv.gz', 'rb') as f:
    df = pd.read_csv(f)

In [8]:
df = df.iloc[:, :2]

In [9]:
import zipfile

In [10]:
with zipfile.ZipFile('./DECENNIALPL2020.P1_2024-10-16T105517.zip', 'r') as myzip:
    with myzip.open('DECENNIALPL2020.P1-Data.csv') as f:
        df2 = pd.read_csv(f)

  df2 = pd.read_csv(f)


In [11]:
df2 = df2.iloc[1:,:3]

In [12]:
df2.GEO_ID = df2.GEO_ID.map(lambda x: x[10:])

In [13]:
df2.GEO_ID = df2.GEO_ID.astype(int)

In [14]:
df2.P1_001N = df2.P1_001N.astype(int)

In [15]:
df2 >> head(3)

Unnamed: 0,GEO_ID,NAME,P1_001N
1,60014001001000,"Block 1000, Block Group 1, Census Tract 4001, ...",0
2,60014001001001,"Block 1001, Block Group 1, Census Tract 4001, ...",0
3,60014001001002,"Block 1002, Block Group 1, Census Tract 4001, ...",0


In [16]:
df2.dtypes

GEO_ID      int64
NAME       object
P1_001N     int64
dtype: object

In [17]:
df >> head(3)

Unnamed: 0,w_geocode,C000
0,60014001001003,23
1,60014001001010,5
2,60014001001011,1


In [18]:
df.dtypes

w_geocode    int64
C000         int64
dtype: object

In [19]:
joined = df2 >> inner_join(_, df, on={'GEO_ID':'w_geocode'})

In [20]:
joined

Unnamed: 0,GEO_ID,NAME,P1_001N,w_geocode,C000
0,60014001001003,"Block 1003, Block Group 1, Census Tract 4001, ...",0,60014001001003,23
1,60014001001010,"Block 1010, Block Group 1, Census Tract 4001, ...",130,60014001001010,5
2,60014001001011,"Block 1011, Block Group 1, Census Tract 4001, ...",153,60014001001011,1
3,60014001001013,"Block 1013, Block Group 1, Census Tract 4001, ...",21,60014001001013,4
4,60014001001015,"Block 1015, Block Group 1, Census Tract 4001, ...",20,60014001001015,3
...,...,...,...,...,...
252467,61150411021031,"Block 1031, Block Group 1, Census Tract 411.02...",200,61150411021031,5
252468,61150411021035,"Block 1035, Block Group 1, Census Tract 411.02...",91,61150411021035,3
252469,61150411021037,"Block 1037, Block Group 1, Census Tract 411.02...",21,61150411021037,9
252470,61150411021039,"Block 1039, Block Group 1, Census Tract 411.02...",54,61150411021039,4


In [21]:
hqta >> head(3)

Unnamed: 0,agency_primary,agency_secondary,hqta_type,hqta_details,route_id,base64_url_primary,base64_url_secondary,org_id_primary,org_id_secondary,geometry
0,Alameda-Contra Costa Transit District,,hq_corridor_bus,stop_along_hq_bus_corridor_single_operator,10,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,,recOZgevYf7Jimm9L,,"MULTIPOLYGON (((-122.09630 37.66978, -122.0963..."
1,Alameda-Contra Costa Transit District,,hq_corridor_bus,stop_along_hq_bus_corridor_single_operator,12,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,,recOZgevYf7Jimm9L,,"MULTIPOLYGON (((-122.28456 37.79937, -122.2844..."
2,Alameda-Contra Costa Transit District,,hq_corridor_bus,stop_along_hq_bus_corridor_single_operator,14,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...,,recOZgevYf7Jimm9L,,"POLYGON ((-122.31124 37.80881, -122.31122 37.8..."


In [24]:
blocks = gpd.read_file('./tl_2020_06_tabblock20.zip')

In [25]:
blocks = blocks >> select(_.GEOID20, _.POP20, _.HOUSING20, _.geometry)

In [26]:
blocks.GEOID20 = blocks.GEOID20.astype(int)

In [31]:
blocks = blocks >> rename(GEO_ID = _.GEOID20)

In [33]:
joined = blocks >> inner_join(_, joined, on='GEO_ID')

In [34]:
joined

Unnamed: 0,GEO_ID,POP20,HOUSING20,geometry,NAME,P1_001N,w_geocode,C000
0,60650406112004,165,40,"POLYGON ((-117.59180 33.97054, -117.59059 33.9...","Block 2004, Block Group 2, Census Tract 406.11...",165,60650406112004,4
1,61130101032011,0,0,"POLYGON ((-121.53676 38.58855, -121.53421 38.5...","Block 2011, Block Group 2, Census Tract 101.03...",0,61130101032011,503
2,61130115001002,25,18,"POLYGON ((-122.25881 38.90023, -122.25799 38.8...","Block 1002, Block Group 1, Census Tract 115, Y...",25,61130115001002,1
3,61130107041021,67,20,"POLYGON ((-121.75876 38.54680, -121.75874 38.5...","Block 1021, Block Group 1, Census Tract 107.04...",67,61130107041021,7
4,61130112071032,17,7,"POLYGON ((-121.82142 38.63702, -121.82142 38.6...","Block 1032, Block Group 1, Census Tract 112.07...",17,61130112071032,18
...,...,...,...,...,...,...,...,...
252467,60890123012008,138,61,"POLYGON ((-122.38878 40.49293, -122.38834 40.4...","Block 2008, Block Group 2, Census Tract 123.01...",138,60890123012008,10
252468,60770011011017,94,34,"POLYGON ((-121.32608 37.97434, -121.32569 37.9...","Block 1017, Block Group 1, Census Tract 11.01,...",94,60770011011017,1
252469,60014402001014,43,16,"POLYGON ((-122.01943 37.60060, -122.01904 37.6...","Block 1014, Block Group 1, Census Tract 4402, ...",43,60014402001014,5
252470,60590630072015,119,54,"POLYGON ((-117.86737 33.63054, -117.86736 33.6...","Block 2015, Block Group 2, Census Tract 630.07...",119,60590630072015,5


In [35]:
joined = joined >> select(_.geoid == _.GEO_ID, _.pop == _.POP20, _.jobs == _.C000,
                          _.housing == _.HOUSING20, _.geometry)

In [36]:
joined.to_parquet('census_jobs_blocks.parquet')

In [37]:
geoparquet_gcs_export(joined, processed_census_path, 'census_jobs_blocks.parquet')

## Spatial Join (start here)

In [38]:
census = gpd.read_parquet(f'{processed_census_path}census_jobs_blocks.parquet').rename(columns={'pop': 'people'})

In [39]:
census >> head(3)

Unnamed: 0,geoid,people,jobs,housing,geometry
0,60650406112004,165,4,40,"POLYGON ((-117.59180 33.97054, -117.59059 33.9..."
1,61130101032011,0,503,0,"POLYGON ((-121.53676 38.58855, -121.53421 38.5..."
2,61130115001002,25,1,18,"POLYGON ((-122.25881 38.90023, -122.25799 38.8..."


In [40]:
hqta = hqta >> select(_.agency_primary, _.hqta_type, _.geometry)

In [41]:
hqta = hqta.reset_index() #  might actually have use for an ix

In [42]:
from calitp_data_analysis import geography_utils

In [43]:
hqta = hqta.to_crs(geography_utils.CA_NAD83Albers)

In [44]:
census = census.to_crs(geography_utils.CA_NAD83Albers)

In [45]:
sjoined = hqta.sjoin(census, how="inner", predicate='intersects')

In [46]:
sjoined

Unnamed: 0,index,agency_primary,hqta_type,geometry,index_right,geoid,people,jobs,housing
0,0,Alameda-Contra Costa Transit District,hq_corridor_bus,"MULTIPOLYGON (((-184637.774 -36487.978, -18464...",125710,60014366012001,523,8,172
17,17,Alameda-Contra Costa Transit District,hq_corridor_bus,"MULTIPOLYGON (((-184632.513 -36521.325, -18463...",125710,60014366012001,523,8,172
20,20,Alameda-Contra Costa Transit District,hq_corridor_bus,"MULTIPOLYGON (((-184662.574 -36958.651, -18469...",125710,60014366012001,523,8,172
26,26,Alameda-Contra Costa Transit District,hq_corridor_bus,"MULTIPOLYGON (((-183127.422 -40597.359, -18313...",125710,60014366012001,523,8,172
33,33,Alameda-Contra Costa Transit District,hq_corridor_bus,"MULTIPOLYGON (((-183386.671 -44394.580, -18338...",125710,60014366012001,523,8,172
...,...,...,...,...,...,...,...,...,...
49138,49138,Sonoma-Marin Area Rail Transit District,major_stop_rail,"POLYGON ((-241654.223 58414.285, -241658.099 5...",79398,60971527023005,0,560,0
49137,49137,Sonoma-Marin Area Rail Transit District,major_stop_rail,"POLYGON ((-241654.554 58414.730, -241658.430 5...",217629,60971527024015,0,19,0
49138,49138,Sonoma-Marin Area Rail Transit District,major_stop_rail,"POLYGON ((-241654.223 58414.285, -241658.099 5...",217629,60971527024015,0,19,0
49137,49137,Sonoma-Marin Area Rail Transit District,major_stop_rail,"POLYGON ((-241654.554 58414.730, -241658.430 5...",232450,60971527023000,0,549,0


In [47]:
sjoined = sjoined >> select(-_.geometry, -_.index_right)

In [48]:
sjoined

Unnamed: 0,index,agency_primary,hqta_type,geoid,people,jobs,housing
0,0,Alameda-Contra Costa Transit District,hq_corridor_bus,60014366012001,523,8,172
17,17,Alameda-Contra Costa Transit District,hq_corridor_bus,60014366012001,523,8,172
20,20,Alameda-Contra Costa Transit District,hq_corridor_bus,60014366012001,523,8,172
26,26,Alameda-Contra Costa Transit District,hq_corridor_bus,60014366012001,523,8,172
33,33,Alameda-Contra Costa Transit District,hq_corridor_bus,60014366012001,523,8,172
...,...,...,...,...,...,...,...
49138,49138,Sonoma-Marin Area Rail Transit District,major_stop_rail,60971527023005,0,560,0
49137,49137,Sonoma-Marin Area Rail Transit District,major_stop_rail,60971527024015,0,19,0
49138,49138,Sonoma-Marin Area Rail Transit District,major_stop_rail,60971527024015,0,19,0
49137,49137,Sonoma-Marin Area Rail Transit District,major_stop_rail,60971527023000,0,549,0


In [49]:
stop_grouped = (sjoined >> group_by(_.index, _.agency_primary, _.hqta_type)
                >> summarize(total_jobs = _.jobs.sum(), total_people = _.people.sum(), n = _.shape[0],
                            total_housing = _.housing.sum())
               
               )

In [50]:
stop_grouped

Unnamed: 0,index,agency_primary,hqta_type,total_jobs,total_people,n,total_housing
0,0,Alameda-Contra Costa Transit District,hq_corridor_bus,22751,75998,498,28767
1,1,Alameda-Contra Costa Transit District,hq_corridor_bus,137148,153107,1406,79424
2,2,Alameda-Contra Costa Transit District,hq_corridor_bus,92457,154659,1241,63141
3,3,Alameda-Contra Costa Transit District,hq_corridor_bus,136575,142008,1365,69241
4,4,Alameda-Contra Costa Transit District,hq_corridor_bus,86300,64626,606,29104
...,...,...,...,...,...,...,...
49222,49226,Southern California Regional Rail Authority,major_stop_rail,5270,9786,66,3300
49223,49227,Southern California Regional Rail Authority,major_stop_rail,7762,5839,117,1800
49224,49228,Southern California Regional Rail Authority,major_stop_rail,6638,4037,47,1286
49225,49229,Southern California Regional Rail Authority,major_stop_rail,7391,3190,29,1281


In [51]:
by_type = stop_grouped >> group_by(_.hqta_type) >> summarize(p5_pop = _.total_people.quantile(.05), p20_pop = _.total_people.quantile(.2),
                                                             p50_pop = _.total_people.quantile(.5), 
                                                  p80_pop = _.total_people.quantile(.8), p95_pop = _.total_people.quantile(.95),
                                                  p5_jobs = _.total_jobs.quantile(.05),
                                                  p20_jobs = _.total_jobs.quantile(.2), p50_jobs = _.total_jobs.quantile(.5),
                                                  p80_jobs = _.total_jobs.quantile(.8), p95_jobs = _.total_jobs.quantile(.95),
                                                  p5_housing = _.total_housing.quantile(.05),
                                                  p20_housing = _.total_housing.quantile(.2), p50_housing = _.total_housing.quantile(.5),
                                                  p80_housing = _.total_housing.quantile(.8), p95_housing = _.total_housing.quantile(.95)
                                                  
                                                  )
by_type

Unnamed: 0,hqta_type,p5_pop,p20_pop,p50_pop,p80_pop,p95_pop,p5_jobs,p20_jobs,p50_jobs,p80_jobs,p95_jobs,p5_housing,p20_housing,p50_housing,p80_housing,p95_housing
0,hq_corridor_bus,4156.0,8735.0,23452.0,73206.0,181390.75,2480.5,6688.0,15961.0,49293.0,188260.0,1702.75,3477.0,8726.0,28040.0,81785.25
1,major_stop_brt,7200.95,14900.6,23434.5,35540.0,58089.4,1308.4,3961.4,12188.5,80472.4,204147.0,2334.85,4954.4,11311.0,21936.0,36266.9
2,major_stop_bus,2505.0,5479.0,9829.0,16966.0,27301.0,1028.0,2119.0,4878.0,14676.0,77209.0,953.0,2090.0,3606.0,7134.0,17160.0
3,major_stop_ferry,154.0,2842.8,6523.0,10109.8,11707.0,602.0,1679.0,14020.0,123204.4,134539.8,103.6,1582.0,4143.0,6762.8,7761.0
4,major_stop_rail,2227.7,6191.0,11971.0,25451.6,56594.0,1377.7,3110.8,8994.0,45033.0,217600.0,855.3,2482.0,5220.0,15627.0,32754.0


In [52]:
by_type_operator = stop_grouped >> group_by(_.hqta_type, _.agency_primary) >> summarize(p5_pop = _.total_people.quantile(.05), p20_pop = _.total_people.quantile(.2),
                                                             p50_pop = _.total_people.quantile(.5), 
                                                  p80_pop = _.total_people.quantile(.8), p95_pop = _.total_people.quantile(.95),
                                                  p5_jobs = _.total_jobs.quantile(.05),
                                                  p20_jobs = _.total_jobs.quantile(.2), p50_jobs = _.total_jobs.quantile(.5),
                                                  p80_jobs = _.total_jobs.quantile(.8), p95_jobs = _.total_jobs.quantile(.95),
                                                  p5_housing = _.total_housing.quantile(.05),
                                                  p20_housing = _.total_housing.quantile(.2), p50_housing = _.total_housing.quantile(.5),
                                                  p80_housing = _.total_housing.quantile(.8), p95_housing = _.total_housing.quantile(.95)
                                                  
                                                  )
by_type_operator

Unnamed: 0,hqta_type,agency_primary,p5_pop,p20_pop,p50_pop,p80_pop,p95_pop,p5_jobs,p20_jobs,p50_jobs,p80_jobs,p95_jobs,p5_housing,p20_housing,p50_housing,p80_housing,p95_housing
0,hq_corridor_bus,Alameda-Contra Costa Transit District,18607.00,32510.0,57999.0,112739.0,165692.00,3402.00,10678.0,19717.0,82938.0,136575.00,7271.00,12737.0,22534.0,50777.0,74095.00
1,hq_corridor_bus,Anaheim Transportation Network,4435.10,8165.4,11509.0,13089.6,14306.40,11143.40,11402.6,12986.0,14761.6,16697.30,1269.70,2315.8,3313.0,3745.6,4331.00
2,hq_corridor_bus,Antelope Valley Transit Authority,5615.70,14756.8,25210.0,42965.2,54929.40,1483.40,4884.6,10790.0,19423.8,123508.40,1906.20,4357.4,9870.0,14462.6,22948.40
3,hq_corridor_bus,Basin Transit,1407.90,2293.2,2735.0,2999.0,4756.90,736.50,839.6,1236.0,1504.0,2041.60,753.90,1185.4,1438.0,1531.6,2448.20
4,hq_corridor_bus,Butte County Association of Governments,5120.50,5511.0,6705.5,8930.2,14957.25,5126.55,5839.2,7259.0,10440.0,12140.80,2258.40,2632.0,3227.0,4185.6,7122.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,major_stop_rail,San Francisco International Airport,0.00,0.0,621.0,2451.6,3062.70,7570.00,8396.6,8801.0,8914.0,10498.40,0.00,0.0,189.0,769.2,1015.30
284,major_stop_rail,San Joaquin Regional Rail Commission,922.80,3344.6,4963.0,7300.8,12224.35,1329.45,2163.2,4626.0,12453.2,16297.30,358.05,913.6,1768.0,3122.0,4956.55
285,major_stop_rail,Santa Clara Valley Transportation Authority,372.00,4056.6,9368.5,12127.0,13964.05,1451.30,3746.4,15232.5,28537.4,33144.45,106.00,1624.0,3605.5,5528.0,7018.20
286,major_stop_rail,Sonoma-Marin Area Rail Transit District,1946.20,4546.0,6977.5,9425.0,11176.25,1362.65,2498.0,4282.0,6514.0,7734.95,889.40,1962.0,2785.5,3989.6,4372.80


In [53]:
by_type.to_csv('pop_jobs_housing_by_major_stop_type.csv')
by_type_operator.to_csv('pop_jobs_housing_by_major_stop_type_and_agency.csv')