In [216]:
import calitp
from calitp.tables import tbl
from siuba import *

import shared_utils
import pandas as pd

## Estimating Costs for bringing GTFS-RT to small/rural operators using GRaaS

* [research issue](https://github.com/cal-itp/data-infra/issues/843)
* data read/cleaned, awaiting cost inputs

#### Bus service increase data

In [217]:
## run service_increase_estimator nb from data-analyses/bus_service_increase to update this if needed:
hours_by_operator = pd.read_parquet('gs://calitp-analytics-data/data-analyses/bus_service_increase/increase_by_operator.parquet')

In [218]:
hours_by_operator = hours_by_operator.groupby('calitp_itp_id').sum().reset_index()
hours_by_operator['additional_buses'] = hours_by_operator['additional_buses'].round(0)

In [219]:
hours_by_operator = (hours_by_operator
                     >> filter(-_.calitp_itp_id.isin((231, 18, 108, 135,
                                                     171, 172, 173, 176,
                                                     174, 177, 178, 179,
                                                     181)))
                    ) ## exclude mixed feeds with incorrect increase estimates; will not affect basic estimate

#### 2019 NTD Vehicle data

In [220]:
ntd_vehicles_2019 = pd.read_csv(f"gs://calitp-analytics-data/data-analyses/bus_service_increase/ntd_vehicles_2019.csv") >> filter(_.State == 'CA')

In [221]:
ntd_vehicles = ntd_vehicles_2019[['NTD ID', 'Bus', 'Articulated Bus', 'Over-The-Road Bus',
                                 'Double Decker Bus', 'Trolleybus', 'Cutaway']]
ntd_vehicles.rename(columns={'NTD ID': 'ntd_id', 'Bus': 'bus', 'Articulated Bus': 'artic_bus',
                            'Over-The-Road Bus': 'otr_bus', 'Double Decker Bus':'dbl_deck_bus',
                            'Trolleybus': 'trolleybus', 'Cutaway': 'cutaway'}, inplace=True)
ntd_vehicles['bus'] = ntd_vehicles['bus'].str.replace(',', '')
ntd_vehicles = ntd_vehicles.astype({'bus': 'int64', 'artic_bus': 'int64', 'otr_bus': 'int64',
                                   'dbl_deck_bus': 'int64', 'trolleybus': 'int64'})

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ntd_vehicles['bus'] = ntd_vehicles['bus'].str.replace(',', '')


In [222]:
ntd_vehicles['total_vehicles'] = (ntd_vehicles[
                ['bus', 'artic_bus', 'otr_bus',
                'dbl_deck_bus', 'trolleybus', 'cutaway']]
                                  .sum(axis=1))

In [223]:
ntd_vehicles = (tbl.views.transitstacks()
 >> select(_.calitp_itp_id == _.itp_id, _.transit_provider,
           _.ntd_id, _._5311_funds)
 >> collect()
 >> inner_join(_, ntd_vehicles, on='ntd_id')
)
# ntd_vehicles.head(3)

In [224]:
operators_vehicles = hours_by_operator >> right_join(_, ntd_vehicles, on='calitp_itp_id')

In [225]:
# operators_vehicles.head(3)

In [226]:
small_non_5311 = (operators_vehicles
                  >> filter((_._5311_funds.isna()) | (_._5311_funds == '$0'))
                  >> filter((_.total_vehicles < 30) & (_.total_vehicles != 0)) # NTD "small system"
                 )

In [227]:
fta_5311_agencies = (operators_vehicles
                     >> filter(-_._5311_funds.isna())
                     >> filter(_._5311_funds != '$0')
                     >> filter((_.total_vehicles < 45) & (_.total_vehicles != 0)) # 45 bus threshold for larger rural operators
                    )

In [228]:
rural_small_agencies = small_non_5311.append(fta_5311_agencies)

In [229]:
rural_small_agencies = rural_small_agencies >> filter(_.calitp_itp_id != 246) ## exclude Caltrain

In [230]:
rural_small_agencies = (rural_small_agencies
                        >> select(-_.addl_service_hrs_annual, -_.bus_capex, -_.bus_capex_annualized,
                                 -_.artic_bus, -_.otr_bus, -_.dbl_deck_bus, -_.trolleybus))

In [231]:
rural_small_agencies['total_with_additional_vehicles'] = rural_small_agencies['total_vehicles'] + rural_small_agencies['additional_buses']

### Dataframe of rural/small agencies

* Includes any agency with less than 30 vehicles (buses and/or cutaways, excluding vans)
* Also includes agencies with FTA 5311 funding and less than 45 vehicles
* 133 agencies

In [234]:
rural_small_agencies.shape

(133, 9)

In [235]:
rural_small_agencies.head(3)

Unnamed: 0,calitp_itp_id,additional_buses,transit_provider,ntd_id,_5311_funds,bus,cutaway,total_vehicles,total_with_additional_vehicles
27,75,47.0,Commerce Municipal Bus Lines,90043,$0,15,7,22,69.0
28,17,9.0,Arcadia Transit,90044,$0,0,18,18,27.0
29,308,7.0,Simi Valley Transit,90050,$0,11,12,23,30.0


### Total number of vehicles estimate for rural/small agencies

* both existing conditions and post-increase estimate based on transit service increase work

In [238]:
rural_small_agencies.total_vehicles.sum()

1425

In [239]:
rural_small_agencies.total_with_additional_vehicles.sum()

2617.0

### Building an estimation methodology (in progress)

In [209]:
grass_vehicle_cost = 100 ## made up numbers, update and add additional inputs
per_operator_costs = 2000

In [210]:
rural_small_agencies = rural_small_agencies.fillna(0)

In [211]:
rural_small_agencies['vehicle_grass_costs'] = rural_small_agencies['total_vehicles'] * grass_vehicle_cost
rural_small_agencies['addl_vehicle_grass_costs'] = rural_small_agencies['additional_buses'] * grass_vehicle_cost

rural_small_agencies['total_grass_costs'] = rural_small_agencies['vehicle_grass_costs'] + per_operator_costs
rural_small_agencies['total_grass_costs_service_increase'] = (rural_small_agencies['vehicle_grass_costs'] +
                                                              rural_small_agencies['addl_vehicle_grass_costs'] +
                                                              per_operator_costs)

In [212]:
rural_small_agencies.head(3)

Unnamed: 0,calitp_itp_id,additional_buses,transit_provider,ntd_id,_5311_funds,bus,cutaway,total_vehicles,total_with_additional_vehicles,vehicle_grass_costs,addl_vehicle_grass_costs,total_grass_costs,total_grass_costs_service_increase
27,75,47.0,Commerce Municipal Bus Lines,90043,$0,15,7,22,69.0,2200,4700.0,4200,8900.0
28,17,9.0,Arcadia Transit,90044,$0,0,18,18,27.0,1800,900.0,3800,4700.0
29,308,7.0,Simi Valley Transit,90050,$0,11,12,23,30.0,2300,700.0,4300,5000.0


In [208]:
rural_small_agencies[['total_grass_costs', 'total_grass_costs_service_increase']].sum()

total_grass_costs                     408500.0
total_grass_costs_service_increase    587500.0
dtype: float64