In [1]:
import pandas as pd
import geopandas as gpd
from siuba import *

In [2]:
results = gpd.read_parquet('outputs/new_trips_with_uza.parquet')

In [3]:
results >> head(3)

Unnamed: 0,GEOID,geometry,p50_mi_transit,p50_mi_auto,total_mi_transit,total_mi_auto,p50_transit_longer,no_transit_replica,no_transit,total_mi,new_transit_mi,projected_new_transit_trips,index_right,UACE10,NAME10,total_pop,new_trips_per_capita
0,6029004402,"POLYGON ((59906.842 -270407.558, 59906.948 -27...",42.2,1.7,84.4,148755.4,True,False,False,148839.8,41329.266335,979.0,2554.0,92161,"Wasco, CA",4027,0.243109
1,6047000802,"POLYGON ((-52727.332 -74888.681, -52723.908 -7...",4.0,4.0,,90331.2,False,True,False,,25097.053439,6274.0,8.0,56251,"Merced, CA",4709,1.332342
2,6085501402,"POLYGON ((-165759.786 -73091.934, -165741.347 ...",3.5,4.6,140.3,47341.1,False,False,False,47481.4,13152.95398,3757.0,1694.0,79039,"San Jose, CA",2962,1.2684


In [4]:
ntd_totals = pd.read_excel('census_ntd/2022 UZA Sums_0.xlsx')

In [5]:
ntd_totals >> head(3)

Unnamed: 0,UACE CD,UZA Name,UZA Population,Total Vehicle Revenue Miles,Total Vehicle Revenue Hours,Total Passenger Miles Traveled,Total Unlinked Passenger Trips,Total Operating Expenses,Fixed Guideway Directional Route Miles,Fixed Guideway Vehicle Revenue Miles,Fixed Guideway Passenger Miles,Fixed Guideway Operating Expenses,Non-Fixed Guideway Vehicle Revenue Miles,Non-Fixed Guideway Passenger Miles,Non-Fixed Guideway Operating Expenses,Fixed Guideway Directional Route Miles >= 7 Years,Fixed Guideway Vehicle Revenue Miles >= 7 Years,High Intensity Bus Directional Route Miles >= 7 Years,High Intensity Bus Vehicle Revenue Miles >= 7 Years
0,1,Alabama Non-UZA,,5191052,311767,1657944,847168,18285687,0.0,0,0,0,2032430,1657944,6989662,0.0,0,0.0,0
1,2,Alaska Non-UZA,,3135019,181845,25208661,1103868,74591876,891.3,1032797,25208661,52590936,526779,0,3461538,891.3,1032797,0.0,0
2,4,Arizona Non-UZA,,6917128,322764,13610232,1394405,24844073,0.0,0,0,0,5011277,13610232,16875708,0.0,0,0.0,0


In [6]:
from calitp_data_analysis.sql import to_snakecase

In [7]:
ntd_totals = to_snakecase(ntd_totals)

In [8]:
ntd_totals.columns

Index(['uace_cd', 'uza_name', 'uza_population', 'total_vehicle_revenue_miles',
       'total_vehicle_revenue_hours', 'total_passenger_miles_traveled',
       'total_unlinked_passenger_trips', 'total_operating_expenses',
       'fixed_guideway_directional_route_miles',
       'fixed_guideway_vehicle_revenue_miles',
       'fixed_guideway_passenger_miles', 'fixed_guideway_operating_expenses',
       'non_fixed_guideway_vehicle_revenue_miles',
       'non_fixed_guideway_passenger_miles',
       'non_fixed_guideway_operating_expenses',
       'fixed_guideway_directional_route_miles_>=_7_years',
       'fixed_guideway_vehicle_revenue_miles_>=_7_years',
       'high_intensity_bus_directional_route_miles_>=_7_years',
       'high_intensity_bus_vehicle_revenue_miles_>=_7_years'],
      dtype='object')

In [9]:
upt_df = ntd_totals >> select(_.uace_cd, _.uza_name, _.total_unlinked_passenger_trips, _.uza_population)

In [10]:
upt_df

Unnamed: 0,uace_cd,uza_name,total_unlinked_passenger_trips,uza_population
0,1,Alabama Non-UZA,847168,
1,2,Alaska Non-UZA,1103868,
2,4,Arizona Non-UZA,1394405,
3,5,Arkansas Non-UZA,799801,
4,6,California Non-UZA,7350357,
...,...,...,...,...
549,69517,"Pinehurst--Southern Pines, NC",9046,50319.0
550,45262,"Kingston, NY",161584,50254.0
551,27631,"Enid, OK",38411,50194.0
552,15184,"Chambersburg, PA",39130,50094.0


In [11]:
upt_df.dtypes

uace_cd                             int64
uza_name                           object
total_unlinked_passenger_trips      int64
uza_population                    float64
dtype: object

In [12]:
results.UACE10 = results.UACE10.fillna(6).astype('int64') #  6 is CA non-UZA

In [13]:
uza_grouped = (results >> group_by(_.UACE10, _.NAME10)
    >> summarize(new_transit_mi_weekday = _.new_transit_mi.sum(),
                new_transit_trips_weekday = _.projected_new_transit_trips.sum(),
                 num_tracts = _.shape[0]
                )
)

In [14]:
df = (uza_grouped >> inner_join(_, upt_df, on = {'UACE10': 'uace_cd'})
      >> mutate(current_weekday_trips = _.total_unlinked_passenger_trips / (365 - 104))
      >> mutate(increase_ratio = _.new_transit_trips_weekday / _.current_weekday_trips,
               new_trips_per_capita = _.new_transit_trips_weekday / _.uza_population,
               )
      >> arrange(-_.new_transit_trips_weekday)
     ).round(2)

In [15]:
df.to_csv('vmt_to_transit_uza.csv', index = False)

In [16]:
## What happens if you compare to 2019/08?

In [17]:
df >> select(-_.UACE10, -_.NAME10, -_.uace_cd)

Unnamed: 0,new_transit_mi_weekday,new_transit_trips_weekday,num_tracts,uza_name,total_unlinked_passenger_trips,uza_population,current_weekday_trips,increase_ratio,new_trips_per_capita
17,101578900.0,13230200.0,2974,"Los Angeles--Long Beach--Anaheim, CA",352876967,12237376.0,1352019.03,9.79,1.08
33,29397320.0,4005496.0,880,"San Francisco--Oakland, CA",174947309,3515933.0,670296.2,5.98,1.14
32,29510330.0,3252737.0,714,"San Diego, CA",63873516,3070300.0,244726.11,13.29,1.06
29,21487950.0,2516538.0,447,"Riverside--San Bernardino, CA",7649513,2276703.0,29308.48,85.86,1.11
30,18100160.0,1985167.0,452,"Sacramento, CA",11886782,1946618.0,45543.23,43.59,1.02
34,15865110.0,1741073.0,398,"San Jose, CA",22102814,1837446.0,84685.11,20.56,0.95
10,6356216.0,1136586.0,166,"Fresno, CA",7205414,717589.0,27606.95,41.17,1.58
24,3502044.0,883930.0,95,"Oxnard--San Buenaventura (Ventura), CA",2795139,376117.0,10709.34,82.54,2.35
0,9380291.0,852281.0,200,California Non-UZA,7350357,,28162.29,30.26,
3,4994286.0,843937.0,156,"Bakersfield, CA",3201508,570235.0,12266.31,68.8,1.48


About 18 million new daily trips across LA/Orange/San Diego/Imperial Counties. For reference, LA Metro's daily ridership is around 1 million. Current regional transit modeshare is only about 5%...

# Statewide

In [18]:
df.sum()

  df.sum()


UACE10                                                                      3281692
new_transit_mi_weekday                                                 323248206.03
new_transit_trips_weekday                                                44026554.0
num_tracts                                                                     8339
uace_cd                                                                     3281692
uza_name                          Los Angeles--Long Beach--Anaheim, CASan Franci...
total_unlinked_passenger_trips                                            704442418
uza_population                                                           34366472.0
current_weekday_trips                                                     2699013.1
increase_ratio                                                              8222.95
new_trips_per_capita                                                         104.13
dtype: object

In [19]:
df.sum()['new_transit_trips_weekday'] / df.sum()['current_weekday_trips']

  df.sum()['new_transit_trips_weekday'] / df.sum()['current_weekday_trips']
  df.sum()['new_transit_trips_weekday'] / df.sum()['current_weekday_trips']


16.312093483355078

# By MPO

In [20]:
mpo = gpd.read_file('census_ntd/mpos.geojson')
# trips_by_tract = gpd.read_parquet('outputs/')

In [21]:
import shared_utils

In [22]:
mpo = mpo[['MPO', 'LABEL_MPO', 'geometry']]
mpo = mpo.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [23]:
results = results >> select(-_.index_right)

In [24]:
mpo_joined = gpd.sjoin(results, mpo, how = 'left')

In [25]:
##  add population, per capita...

df2 = (mpo_joined >> group_by(_.MPO, _.LABEL_MPO)    
    >> summarize(new_transit_mi_weekday = _.new_transit_mi.sum(),
                new_transit_trips_weekday = _.projected_new_transit_trips.sum(),
                 num_tracts = _.shape[0],
                 population = _.total_pop.sum()
                )
      >> mutate(new_trips_per_capita = _.new_transit_trips_weekday / _.population)
      >> mutate(new_transit_mi_pct = _.new_transit_mi_weekday / _.new_transit_mi_weekday.sum())
      ).round(2) >> arrange(-_.new_transit_trips_weekday)

In [26]:
df2.to_csv('vmt_to_transit_mpo.csv', index = False)