In [43]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)

import shared_utils
import pandas as pd
import geopandas as gpd

import gcsfs
from calitp_data_analysis import get_fs
from calitp_data_analysis import geography_utils, utils
fs = get_fs()


In [2]:
GCS_FILE_PATH = 'gs://calitp-analytics-data/data-analyses/ahsc_grant/'

In [3]:
def read_parquet_from_gcs(filename):
    gcs_path = GCS_FILE_PATH.replace("gs://", "") + filename
    with fs.open(gcs_path, 'rb') as f:
        return gpd.read_parquet(f)

In [4]:
# start with trips per stop and ridership
stoptrips = read_parquet_from_gcs("tbl1_trips_perstop_07_08_2024.parquet")
stoptrips.head (5)

Unnamed: 0,name,location_type,route_type,stop_name,stop_id,stop_code,geometry,n_trips_weekday,n_trips_saturday,n_trips_sunday,n_routes_weekday,n_routes_saturday,n_routes_sunday,stop_desc
95,Monterey Salinas Schedule,0.0,3,Del Monte Center / Gate 1,2,2,POINT (-121.89736 36.58461),38.0,30.0,26.0,7.0,5.0,5.0,
96,Monterey Salinas Schedule,0.0,3,Del Monte Center / Gate 2,3,3,POINT (-121.89728 36.58473),31.0,27.0,23.0,6.0,5.0,4.0,
97,Monterey Salinas Schedule,0.0,3,Del Monte Center / Gate 3,4,4,POINT (-121.89893 36.58445),25.0,12.0,11.0,5.0,3.0,2.0,
98,Monterey Salinas Schedule,0.0,3,6th / Mission Street,6,6,POINT (-121.92076 36.55564),91.0,86.0,77.0,6.0,6.0,6.0,
99,Monterey Salinas Schedule,0.0,3,Northridge Mall,11,11,POINT (-121.65803 36.71563),142.0,136.0,132.0,6.0,7.0,7.0,


In [5]:
ridership_metro = read_parquet_from_gcs("ridership_metro_08_26_2024.parquet")
ridership_metro.head(5)

Unnamed: 0,feed_key,stop_id,stop_name,geometry,STOP_NAME,sat_ons,sun_ons,weekday_ons,name
0,06d1f3ac2b0ae5e74424edbbfefa19ed,12591,LA Zoo,POINT (158199.490 -428414.858),LA ZOO,857.679453,381.190868,2775.546009,LA Metro Bus Schedule
1,06d1f3ac2b0ae5e74424edbbfefa19ed,5377,1st / Hill,POINT (161833.578 -438634.619),1ST / HILL,14914.092717,11531.023762,159742.798188,LA Metro Bus Schedule
2,06d1f3ac2b0ae5e74424edbbfefa19ed,15612,1st / Hill,POINT (161849.863 -438611.462),1ST / HILL,4800.622496,3383.068955,43551.056687,LA Metro Bus Schedule
3,06d1f3ac2b0ae5e74424edbbfefa19ed,1217,6th / Wall,POINT (161822.154 -439849.592),6TH / WALL,3502.191101,3061.43916,24682.108713,LA Metro Bus Schedule
4,06d1f3ac2b0ae5e74424edbbfefa19ed,7376,7th / Alma,POINT (157649.168 -473829.124),7TH / ALMA,0.0,35.736644,559.874088,LA Metro Bus Schedule


In [6]:
ridership_sbmtd = read_parquet_from_gcs("ridership_sbmtd_08_26_2024.parquet")
ridership_sbmtd.head(5)

Unnamed: 0,feed_key,stop_id,stop_name,geometry,sat_ons,sun_ons,weekday_ons,name
0,52201caab047b98ae19b7547c0d7c2ad,1,Modoc & Portesuello,POINT (25170.737 -398993.625),1573.0,1287.0,13964.0,SBMTD Schedule
1,52201caab047b98ae19b7547c0d7c2ad,2,Milpas & Montecito,POINT (29196.552 -399052.308),3901.0,3139.0,30225.0,SBMTD Schedule
2,52201caab047b98ae19b7547c0d7c2ad,4,Cathedral Oaks & Camino Del Rio,POINT (20247.563 -395908.292),,,1.0,SBMTD Schedule
3,52201caab047b98ae19b7547c0d7c2ad,5,Via Real & Sandpiper MHP,POINT (42143.060 -400995.911),217.0,109.0,1485.0,SBMTD Schedule
4,52201caab047b98ae19b7547c0d7c2ad,6,UCSB Elings Hall Outbound,POINT (14738.802 -400125.683),1374.0,1199.0,9777.0,SBMTD Schedule


In [7]:
ridership_mst = read_parquet_from_gcs("ridership_mst_08_26_2024.parquet")
ridership_mst.head(5)

Unnamed: 0,feed_key,stop_id,stop_name,geometry,sat_ons,sun_ons,weekday_ons,name
0,118c3a62eab691ac449fe0c1c7505413,2,Del Monte Center / Gate 1,POINT (-169532.243 -157455.258),2240.0,1740.0,9287.0,Monterey Salinas Schedule
1,118c3a62eab691ac449fe0c1c7505413,3,Del Monte Center / Gate 2,POINT (-169524.827 -157441.956),616.0,348.0,1757.0,Monterey Salinas Schedule
2,118c3a62eab691ac449fe0c1c7505413,4,Del Monte Center / Gate 3,POINT (-169673.036 -157470.002),56.0,58.0,502.0,Monterey Salinas Schedule
3,118c3a62eab691ac449fe0c1c7505413,6,6th / Mission Street,POINT (-171687.391 -160632.627),4312.0,4234.0,21837.0,Monterey Salinas Schedule
4,118c3a62eab691ac449fe0c1c7505413,11,Northridge Mall,POINT (-147895.267 -143297.134),9128.0,13224.0,40662.0,Monterey Salinas Schedule


In [8]:
# contatenate gdfs, keeping common columns
ridership_all = pd.concat([ridership_metro,ridership_sbmtd,ridership_mst], join='inner', ignore_index="True")
ridership_all.sample(5)

Unnamed: 0,feed_key,stop_id,stop_name,geometry,sat_ons,sun_ons,weekday_ons,name
5510,06d1f3ac2b0ae5e74424edbbfefa19ed,17274,Jefferson / 12th,POINT (154086.569 -441956.781),131.034361,83.385502,1012.538244,LA Metro Bus Schedule
1831,06d1f3ac2b0ae5e74424edbbfefa19ed,140955,Vermont / Manchester,POINT (157909.002 -449131.691),9398.737343,8695.91668,52997.442889,LA Metro Bus Schedule
12239,52201caab047b98ae19b7547c0d7c2ad,161,Storke & Marketplace,POINT (11956.807 -398640.260),2671.0,2555.0,12287.0,SBMTD Schedule
2408,06d1f3ac2b0ae5e74424edbbfefa19ed,164,Atlantic / Agnes,POINT (167372.364 -454086.763),774.293951,702.820663,7028.206632,LA Metro Bus Schedule
7221,06d1f3ac2b0ae5e74424edbbfefa19ed,11254,Martin Luther King Jr / Brenton,POINT (165942.994 -451935.326),166.771005,83.385502,869.591668,LA Metro Bus Schedule


In [9]:
stoptrips.columns

Index(['name', 'location_type', 'route_type', 'stop_name', 'stop_id',
       'stop_code', 'geometry', 'n_trips_weekday', 'n_trips_saturday',
       'n_trips_sunday', 'n_routes_weekday', 'n_routes_saturday',
       'n_routes_sunday', 'stop_desc'],
      dtype='object')

In [10]:
len(ridership_all)

13677

In [11]:
len(stoptrips)

13730

In [41]:
trips_ridership_joined = pd.merge(
    stoptrips,
    ridership_all['name', 'stop_id', 'stop_name' ,
    on=['name', 'stop_id', 'stop_name'],
    how='outer'
)


In [38]:
len(trips_ridership_joined)

13873

In [49]:
#Rename 'geometry_x' to 'geometry'
trips_ridership_joined = trips_ridership_joined.rename(columns={'geometry_x': 'geometry'})

#Set 'geometry' as the active geometry column
trips_ridership_joined = trips_ridership_joined.set_geometry('geometry')

In [50]:
trips_ridership_joined = trips_ridership_joined.to_crs(epsg=3347)

In [51]:
stop_geom = trips_ridership_joined[["feed_key", "stop_id", "geometry"]]

In [53]:
trips_ridership_joined.geometry = trips_ridership_joined.buffer(402.336)

In [54]:
acs_ca = read_parquet_from_gcs("acs_tbl_ca.parquet")
acs_ca.head(5)

Unnamed: 0,ALAND,geometry,geo_id,total_pop,households,not_us_citizen_pop,black_pop,hispanic_pop,inc_extremelylow,inc_verylow,...,pop_determined_poverty_status,poverty,no_car,no_cars,male_youth,female_youth,male_seniors,female_seniors,youth_pop,seniors_pop
0,3837562,"POLYGON ((-118.58119 34.14318, -118.58099 34.1...",6037137504,2073.0,694.0,23.0,19.0,64.0,30.0,29.0,...,2073.0,90.0,12.0,10.0,266.0,244.0,266.0,297.0,510.0,563.0
1,4472196,"POLYGON ((-118.60573 34.14585, -118.60561 34.1...",6037138000,4673.0,1784.0,198.0,325.0,393.0,270.0,124.0,...,4673.0,386.0,0.0,19.0,598.0,425.0,549.0,487.0,1023.0,1036.0
2,1152031,"POLYGON ((-118.53082 34.18024, -118.52952 34.1...",6037139200,5840.0,2172.0,815.0,153.0,1330.0,242.0,315.0,...,5840.0,602.0,66.0,108.0,777.0,634.0,309.0,662.0,1411.0,971.0
3,1213095,"POLYGON ((-121.50218 38.55643, -121.50184 38.5...",6067002300,3342.0,1629.0,79.0,55.0,666.0,150.0,67.0,...,3342.0,127.0,13.0,31.0,362.0,410.0,242.0,291.0,772.0,533.0
4,3224718,"POLYGON ((-121.50970 38.54070, -121.50960 38.5...",6067002400,4685.0,2011.0,43.0,135.0,440.0,109.0,132.0,...,4679.0,159.0,17.0,147.0,302.0,823.0,451.0,682.0,1125.0,1133.0


In [55]:
len(acs_ca)

8057

In [56]:
acs_ca = acs_ca.to_crs(epsg=3347)

In [11]:
ridership_all = ridership_all.drop(columns=['feed_key'])

In [12]:
# join together, keep buses, create total trips per weekday
trips_ridership_joined = pd.merge(stoptrips,
                                  ridership_all,
                                  on = ['name', 'stop_name', 'stop_id','geometry'],
                                  how = 'left'
                                 )



In [13]:
trips_ridership_joined.shape

(13730, 17)

In [14]:
columns_to_check = ['weekday_ons', 'sat_ons', 'sun_ons']

In [15]:
trips_ridership_joined_all_na = trips_ridership_joined[
    trips_ridership_joined[columns_to_check].isna().all(axis=1)
]

In [16]:
trips_ridership_joined_all_na.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 13730 entries, 0 to 13729
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   name               13730 non-null  object  
 1   location_type      966 non-null    float64 
 2   route_type         13730 non-null  object  
 3   stop_name          13730 non-null  object  
 4   stop_id            13730 non-null  object  
 5   stop_code          13730 non-null  object  
 6   geometry           13730 non-null  geometry
 7   n_trips_weekday    13710 non-null  float64 
 8   n_trips_saturday   13124 non-null  float64 
 9   n_trips_sunday     13090 non-null  float64 
 10  n_routes_weekday   13710 non-null  float64 
 11  n_routes_saturday  13124 non-null  float64 
 12  n_routes_sunday    13090 non-null  float64 
 13  stop_desc          0 non-null      object  
 14  sat_ons            0 non-null      float64 
 15  sun_ons            0 non-null      float64 
 

In [17]:
trips_ridership_joined.to_excel('trips_ridership_joined_all.xlsx', index=False)

In [18]:
trips_ridership_joined_all_na.to_excel('trips_ridership_joined_all_na.xlsx', index=False)

In [19]:
trips_ridership_joined_filtered = trips_ridership_joined[
    trips_ridership_joined[['weekday_ons', 'sat_ons', 'sun_ons']].isna().all(axis=1)
]

In [20]:
trips_ridership_joined_filtered.name.unique()

array(['Monterey Salinas Schedule', 'LA Metro Bus Schedule',
       'SBMTD Schedule'], dtype=object)

In [21]:
len(trips_ridership_joined)

13730

In [22]:
trips_ridership_joined.columns

Index(['name', 'location_type', 'route_type', 'stop_name', 'stop_id',
       'stop_code', 'geometry', 'n_trips_weekday', 'n_trips_saturday',
       'n_trips_sunday', 'n_routes_weekday', 'n_routes_saturday',
       'n_routes_sunday', 'stop_desc', 'sat_ons', 'sun_ons', 'weekday_ons'],
      dtype='object')

In [23]:
# add .25mi (10min walk) buffers to stops
# this replaces our point geometry with polygons
trips_ridership_joined.geometry = trips_ridership_joined.buffer(402.336)


  trips_ridership_joined.geometry = trips_ridership_joined.buffer(402.336)
