In [28]:
import pandas as pd
import geopandas as gpd
from siuba import *

HQTA_PATH = 'gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/export/2024-09-18/ca_hq_transit_areas.parquet'

In [30]:
processed_census_path = 'gs://calitp-analytics-data/data-analyses/sb125/quick_mts_jobs/'

In [31]:
from calitp_data_analysis.utils import geoparquet_gcs_export

# Quick counts of people and jobs within ~0.5mi of a Major Transit Stop

Just wrapped this up, attached are two csvs. One summarizes jobs and population within a half mile of the various types of Major Transit Stop per PRC 21155 and PRC 21064.3, the other additionally separates them out by transit agency.

Used 2020 Census LEHD Workplace Area Characteristics to get all primary jobs by Census block, population by block is from the 2020 decennial Census. Spatial join was a simple intersection of each half-mile buffer around a major transit stop with all Census blocks, then jobs and people were summed across all those blocks to get a single figure per major stop.


## read clean census (already done, output on GCS)

In [None]:
import gzip
# all workers, all primary jobs
with gzip.open('./ca_wac_S000_JT01_2021.csv.gz', 'rb') as f:
    df = pd.read_csv(f)

In [None]:
df = df.iloc[:, :2]

In [None]:
import zipfile

In [None]:
with zipfile.ZipFile('./DECENNIALPL2020.P1_2024-10-16T105517.zip', 'r') as myzip:
    with myzip.open('DECENNIALPL2020.P1-Data.csv') as f:
        df2 = pd.read_csv(f)

In [None]:
df2 = df2.iloc[1:,:3]

In [None]:
df2.GEO_ID = df2.GEO_ID.map(lambda x: x[10:])

In [None]:
df2.GEO_ID = df2.GEO_ID.astype(int)

In [None]:
df2.P1_001N = df2.P1_001N.astype(int)

In [None]:
df2 >> head(3)

In [None]:
df2.dtypes

In [None]:
df >> head(3)

In [None]:
df.dtypes

In [None]:
joined = df2 >> inner_join(_, df, on={'GEO_ID':'w_geocode'})

In [None]:
joined

In [None]:
hqta >> head(3)

In [None]:
blocks = gpd.read_file('./tl_2020_06_tabblock20.zip')

In [None]:
blocks = blocks >> select(_.GEOID20, _.POP20, _.HOUSING20, _.geometry)

In [None]:
blocks.GEOID20 = blocks.GEOID20.astype(int)

In [None]:
blocks = blocks >> rename(GEO_ID = _.GEOID20)

In [None]:
joined = blocks >> inner_join(_, joined, on='GEO_ID')

In [None]:
joined

In [None]:
joined = joined >> select(_.geoid == _.GEO_ID, _.pop == _.POP20, _.jobs == _.C000,
                          _.housing == _.HOUSING20, _.geometry)

In [None]:
joined.to_parquet('census_jobs_blocks.parquet')

In [None]:
geoparquet_gcs_export(joined, processed_census_path, 'census_jobs_blocks.parquet')

## Spatial Join (start here)

In [72]:
census = gpd.read_parquet(f'{processed_census_path}census_jobs_blocks.parquet').rename(columns={'pop': 'people'})

In [73]:
hqta = gpd.read_parquet(HQTA_PATH)

In [74]:
hqta = hqta >> select(_.agency_primary, _.hqta_type, _.geometry) >> filter(_.hqta_type != 'hq_corridor_bus')

In [75]:
from calitp_data_analysis import geography_utils

In [76]:
hqta = hqta.to_crs(geography_utils.CA_NAD83Albers)

In [77]:
census = census.to_crs(geography_utils.CA_NAD83Albers)

In [78]:
sjoined = census.sjoin(hqta, how="inner", predicate='intersects') # flip direction to calculate area
sjoined = sjoined >> rename(hqta_ix = _.index_right)

In [79]:
# hqta = hqta.reset_index().rename(columns={'index':'hqta_ix'}) #  might actually have use for an ix
# sjoined = hqta.sjoin(census, how="inner", predicate='intersects')

In [81]:
from shared_utils.rt_utils import METERS_PER_MILE
sq_m_per_sq_mi = METERS_PER_MILE**2

In [82]:
sjoined = sjoined >> mutate(area_sq_mi = _.geometry.map(lambda x: x.area / sq_m_per_sq_mi))

In [83]:
sjoined = sjoined >> select(-_.geometry, -_.index_right)

In [84]:
sjoined

Unnamed: 0,geoid,people,jobs,housing,hqta_ix,agency_primary,hqta_type,area_sq_mi
1,61130101032011,0,503,0,46899,Yolo County Transportation District,major_stop_bus,0.058746
3933,61130102011037,0,24,0,46899,Yolo County Transportation District,major_stop_bus,0.031665
4436,61130102042002,53,12,23,46899,Yolo County Transportation District,major_stop_bus,0.009574
5017,61130102042011,0,20,0,46899,Yolo County Transportation District,major_stop_bus,0.005923
5673,61130102011043,49,1,28,46899,Yolo County Transportation District,major_stop_bus,0.008385
...,...,...,...,...,...,...,...,...
180366,60450109003041,0,4,0,33627,Mendocino Transit Authority,major_stop_bus,0.174335
245059,60450109001092,22,5,9,33627,Mendocino Transit Authority,major_stop_bus,2.219659
245419,60450109003031,19,7,5,33627,Mendocino Transit Authority,major_stop_bus,0.211296
247980,60450109003011,40,42,17,33627,Mendocino Transit Authority,major_stop_bus,0.407573


In [125]:
(sjoined >> distinct(_.geoid, _.people)).sum()

geoid     6217423610387783999
people               15073852
dtype: int64

In [86]:
stop_grouped = (sjoined >> group_by(_.hqta_ix, _.agency_primary, _.hqta_type)
                >> summarize(total_jobs = _.jobs.sum(), total_pop = _.people.sum(),
                            total_housing = _.housing.sum(), total_sq_mi = _.area_sq_mi.sum(),
                            n = _.shape[0])
               )

In [87]:
stop_grouped

Unnamed: 0,hqta_ix,agency_primary,hqta_type,total_jobs,total_pop,total_housing,total_sq_mi,n
0,2569,Alameda-Contra Costa Transit District,major_stop_brt,7002,10851,4638,0.906241,99
1,2570,Alameda-Contra Costa Transit District,major_stop_brt,7057,11208,4693,0.927706,100
2,2571,Alameda-Contra Costa Transit District,major_stop_brt,6016,10291,4398,0.910373,97
3,2572,Alameda-Contra Costa Transit District,major_stop_brt,6013,9872,4222,0.891233,96
4,2573,Alameda-Contra Costa Transit District,major_stop_brt,3616,9962,4076,0.890710,87
...,...,...,...,...,...,...,...,...
46656,49226,Southern California Regional Rail Authority,major_stop_rail,5270,9786,3300,1.258432,66
46657,49227,Southern California Regional Rail Authority,major_stop_rail,7762,5839,1800,0.874053,117
46658,49228,Southern California Regional Rail Authority,major_stop_rail,6638,4037,1286,1.652137,47
46659,49229,Southern California Regional Rail Authority,major_stop_rail,7391,3190,1281,1.685196,29


In [123]:
stop_grouped.describe()

Unnamed: 0,hqta_ix,total_jobs,total_pop,total_housing,total_sq_mi,n
count,46661.0,46661.0,46661.0,46661.0,46661.0,46661.0
mean,25899.047341,516081.1,302279.3,246117.4,1.20781,77.850389
std,13470.09291,29406010.0,18799070.0,17754480.0,0.952,44.77068
min,2569.0,1.658378e-17,0.0,0.0,0.1556,1.0
25%,14234.0,514.9397,1010.199,374.6853,0.921196,42.0
50%,25899.0,2912.232,7428.643,2757.2,1.039169,70.0
75%,37564.0,17215.03,28245.48,11898.6,1.293028,104.0
max,49230.0,2563908000.0,1626815000.0,1559880000.0,153.077307,317.0


In [131]:
def group_calculate(df, grouper, normalize=False):
    if normalize:
        df.total_pop = df.total_pop / df.total_sq_mi
        df.total_jobs = df.total_jobs / df.total_sq_mi
        df.total_housing = df.total_housing / df.total_sq_mi
    df = stop_grouped >> grouper >> summarize(
                            p5_pop = _.total_pop.quantile(.05), p20_pop = _.total_pop.quantile(.2),
                            p50_pop = _.total_pop.quantile(.5), 
                            p80_pop = _.total_pop.quantile(.8), p95_pop = _.total_pop.quantile(.95),
                            p5_jobs = _.total_jobs.quantile(.05),
                            p20_jobs = _.total_jobs.quantile(.2), p50_jobs = _.total_jobs.quantile(.5),
                            p80_jobs = _.total_jobs.quantile(.8), p95_jobs = _.total_jobs.quantile(.95),
                            p5_housing = _.total_housing.quantile(.05),
                            p20_housing = _.total_housing.quantile(.2), p50_housing = _.total_housing.quantile(.5),
                            p80_housing = _.total_housing.quantile(.8), p95_housing = _.total_housing.quantile(.95)
                        )
    if normalize:
        new_names = df.columns.map(lambda x: x+'_per_sq_mi')
        rename_dict = dict(zip(df.columns, new_names))
        df = df.rename(columns=rename_dict)
    return df.round(1)

In [132]:
by_type = group_calculate(stop_grouped, group_by(_.hqta_type), normalize=False)
by_type

Unnamed: 0,hqta_type,p5_pop,p20_pop,p50_pop,p80_pop,p95_pop,p5_jobs,p20_jobs,p50_jobs,p80_jobs,p95_jobs,p5_housing,p20_housing,p50_housing,p80_housing,p95_housing
0,major_stop_brt,2110.1,18053.0,53655.8,97766.6,173972.0,754.8,3039.3,32300.8,153473.3,451051.1,674.0,5630.4,27611.8,57667.4,101450.9
1,major_stop_bus,3.3,276.0,6623.7,41576.8,151933.9,2.4,166.6,2579.0,30900.5,259641.3,1.2,103.1,2446.7,18559.5,77918.6
2,major_stop_ferry,1.2,246.5,8385735.7,29354041.8,95795870.9,0.5,733.9,9602955.8,290961664.4,508867738.6,0.5,139.8,5326092.8,19459871.7,54055158.9
3,major_stop_rail,2.7,208.3,19053.0,97171.9,382423.9,4.8,141.8,8162.2,199953.0,1269118.6,0.8,91.3,8203.0,54712.1,257242.8


In [133]:
by_type_norm = group_calculate(stop_grouped, group_by(_.hqta_type), normalize=True)
by_type_norm

Unnamed: 0,hqta_type_per_sq_mi,p5_pop_per_sq_mi,p20_pop_per_sq_mi,p50_pop_per_sq_mi,p80_pop_per_sq_mi,p95_pop_per_sq_mi,p5_jobs_per_sq_mi,p20_jobs_per_sq_mi,p50_jobs_per_sq_mi,p80_jobs_per_sq_mi,p95_jobs_per_sq_mi,p5_housing_per_sq_mi,p20_housing_per_sq_mi,p50_housing_per_sq_mi,p80_housing_per_sq_mi,p95_housing_per_sq_mi
0,major_stop_brt,1840.3,17375.7,57420.5,110934.7,217798.6,665.7,3076.9,34658.4,175746.7,489469.8,626.8,5622.4,30147.9,62364.5,116139.8
1,major_stop_bus,1.6,195.7,6421.9,46040.9,188864.2,1.2,121.7,2477.4,34946.6,295377.7,0.6,75.7,2368.9,20642.9,94624.3
2,major_stop_ferry,0.6,188.5,17157363.4,64208147.1,269166018.3,0.3,563.1,19312930.0,632730767.5,1169795000.0,0.3,107.0,10897279.9,42565937.4,151294576.5
3,major_stop_rail,1.3,166.3,19615.9,107304.9,521612.2,2.4,104.4,8600.2,220302.1,1562487.0,0.4,64.8,8438.4,60891.5,339747.3


In [136]:
by_agency = group_calculate(stop_grouped, group_by(_.hqta_type, _.agency_primary), normalize=False)
by_agency

Unnamed: 0,hqta_type,agency_primary,p5_pop,p20_pop,p50_pop,p80_pop,p95_pop,p5_jobs,p20_jobs,p50_jobs,p80_jobs,p95_jobs,p5_housing,p20_housing,p50_housing,p80_housing,p95_housing
0,major_stop_brt,Alameda-Contra Costa Transit District,2691.0,12301.3,32465.8,79439.4,328910.0,482.2,1016.3,7114.6,67285.2,793563.0,785.3,3708.4,11450.7,45675.0,175858.8
1,major_stop_brt,City and County of San Francisco,6636.4,47414.8,81049.2,135531.2,203222.4,2976.7,16258.9,65830.3,206706.8,520397.0,3280.0,24529.4,41873.3,81350.0,125547.7
2,major_stop_brt,Los Angeles County Metropolitan Transportation...,211.2,1518.8,16764.3,65971.9,161867.3,124.2,1135.6,2800.6,110132.4,238440.4,81.5,582.9,5024.2,24438.4,92191.8
3,major_stop_bus,Alameda-Contra Costa Transit District,12.4,756.7,17826.6,89099.5,346766.3,5.2,175.0,4033.0,41843.2,602128.5,4.1,276.5,7215.5,40519.8,151313.1
4,major_stop_bus,Amador Regional Transit System,0.0,0.0,0.0,0.0,78109.0,0.0,0.0,0.0,0.0,647398.2,0.0,0.0,0.0,0.0,39211.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,major_stop_rail,San Francisco International Airport,0.0,0.0,0.0,0.4,0.9,0.0,0.0,0.0,1.0,3.1,0.0,0.0,0.0,0.1,0.3
158,major_stop_rail,San Joaquin Regional Rail Commission,0.1,1.7,96.3,35882.1,154320.5,0.2,4.3,82.3,55286.6,176650.1,0.0,0.5,38.7,17125.9,71225.2
159,major_stop_rail,Santa Clara Valley Transportation Authority,0.1,14.5,854.1,37112.5,297692.9,0.9,61.0,854.1,32126.3,824649.4,0.0,6.9,327.8,17598.8,166558.1
160,major_stop_rail,Sonoma-Marin Area Rail Transit District,0.0,0.0,30.2,551.5,1324.7,0.0,0.0,8.7,105.2,634.0,0.0,0.0,14.0,240.3,512.0


In [137]:
by_agency_norm = group_calculate(stop_grouped, group_by(_.hqta_type, _.agency_primary), normalize=True)
by_agency_norm

Unnamed: 0,hqta_type_per_sq_mi,agency_primary_per_sq_mi,p5_pop_per_sq_mi,p20_pop_per_sq_mi,p50_pop_per_sq_mi,p80_pop_per_sq_mi,p95_pop_per_sq_mi,p5_jobs_per_sq_mi,p20_jobs_per_sq_mi,p50_jobs_per_sq_mi,p80_jobs_per_sq_mi,p95_jobs_per_sq_mi,p5_housing_per_sq_mi,p20_housing_per_sq_mi,p50_housing_per_sq_mi,p80_housing_per_sq_mi,p95_housing_per_sq_mi
0,major_stop_brt,Alameda-Contra Costa Transit District,2326.6,11840.7,34882.3,91058.3,424133.7,440.7,955.7,7447.8,78925.8,1055032.4,680.2,3569.6,12448.4,51972.2,221104.9
1,major_stop_brt,City and County of San Francisco,5983.1,48767.2,87583.2,145809.0,234331.2,2686.5,17310.8,72085.3,220843.9,558971.1,2959.0,26166.0,44811.3,90595.0,147644.8
2,major_stop_brt,Los Angeles County Metropolitan Transportation...,154.0,1404.2,17470.5,75232.4,204441.4,88.7,1028.5,2819.1,135676.8,287539.1,59.4,542.1,5254.0,29502.2,108774.6
3,major_stop_bus,Alameda-Contra Costa Transit District,7.5,597.5,18279.5,104560.3,438877.4,2.8,143.5,4055.8,48300.6,736599.6,2.5,219.4,7410.1,47792.6,199941.5
4,major_stop_bus,Amador Regional Transit System,0.0,0.0,0.0,0.0,94840.8,0.0,0.0,0.0,0.0,783792.0,0.0,0.0,0.0,0.0,47901.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,major_stop_rail,San Francisco International Airport,0.0,0.0,0.0,0.2,0.5,0.0,0.0,0.0,0.5,1.6,0.0,0.0,0.0,0.1,0.2
158,major_stop_rail,San Joaquin Regional Rail Commission,0.1,0.8,70.4,42892.8,204565.0,0.1,2.2,59.2,68887.7,228394.5,0.0,0.3,28.3,20593.5,94043.1
159,major_stop_rail,Santa Clara Valley Transportation Authority,0.1,8.9,701.1,42286.4,395125.1,0.4,43.1,698.1,35592.0,1094461.8,0.0,4.3,271.0,19894.8,221067.1
160,major_stop_rail,Sonoma-Marin Area Rail Transit District,0.0,0.0,19.6,462.9,1110.0,0.0,0.0,5.6,80.5,532.3,0.0,0.0,9.1,201.7,428.9


In [138]:
by_type.to_csv('pop_jobs_housing_by_major_stop_type.csv')
by_type_norm.to_csv('pop_jobs_housing_by_major_stop_type_sq_mi.csv')
by_agency.to_csv('pop_jobs_housing_by_major_stop_type_and_agency.csv')
by_agency_norm.to_csv('pop_jobs_housing_by_major_stop_type_and_agency_sq_mi.csv')