In [2]:
import calitp
from calitp.tables import tbl
from siuba import *
from siuba.sql import sql_raw
from siuba.experimental import completer
from plotnine import *

import pandas as pd
import numpy as np

import geopandas as gpd
import folium



## About how many validators are needed to connect to LOSSAN?

In [4]:
transit_stacks = tbl.views.transitstacks()

In [5]:
lossan_counties = ['San Luis Obispo', 'Santa Barbara', 'Ventura',
                  'Los Angeles', 'San Diego']

In [6]:
dim_stops = (tbl.views.gtfs_schedule_dim_stops()
                 >> select(_.itp_id == _.calitp_itp_id, _.stop_key, _.stop_id, _.stop_lat, _.stop_lon, _.stop_name)
                )

In [7]:
weekday_stops = (tbl.views.gtfs_schedule_fact_daily_feed_stops()
                    >> filter(_.date == '2021-09-01') #weekday service
                    >> select(_.stop_key, _.date)
                    >> inner_join(_, dim_stops, on='stop_key')
                      )

In [8]:
info_cols = ['itp_id', 'transit_provider', 'ntd_id',
             'modes', 'county', 'legacy_ntd_id']

vehicle_cols = ['bus', 'articulated_bus', 'over_the_road_bus',
                'school_bus', 'trolleybus', 'vintage_historic_trolley',
                'streetcar']

paratransit_cols = ['van', 'cutaway', 'automobile',
                     'minivan', 'sport_utility_vehicle']

In [44]:
lossan_county_stops = (transit_stacks
                         >> filter(_.county.isin(lossan_counties))
                         >> select(_.itp_id)
                         >> inner_join(_, weekday_stops, on = "itp_id")
                         >> collect()
                    )

In [130]:
lossan_operators = (transit_stacks
                         >> filter(_.county.isin(lossan_counties))
                         >> select(*(info_cols + vehicle_cols + paratransit_cols))
                         >> collect()
                    )

In [164]:
lossan_operators.replace(' ', np.nan, inplace=True)
for col in (vehicle_cols + paratransit_cols):
    lossan_operators = lossan_operators.astype({col:'float64'})

lossan_operators[vehicle_cols].sum()

bus                         4351.0
articulated_bus              584.0
over_the_road_bus            229.0
school_bus                     0.0
trolleybus                     0.0
vintage_historic_trolley       2.0
streetcar                      0.0
dtype: float64

In [149]:
## Eric's estimation...
validators_needed = pd.Series({'bus':2, 'articulated_bus':3, 'over_the_road_bus':1,
                             'vintage_historic_trolley':2, 'trolleybus':2,
                             'van':1, 'cutaway':1, 'automobile':1,
                             'minivan':1, 'sport_utility_vehicle:':1,
                             'lrt_stn':6, 'hrt_stn':15, 'la_union_stn':40,
                             'commuter_rail_stn':6, 'interchange':20, ## interchange is for places like 7th/Metro
                             'ferry_stn':4})

## Rail/Ferry

In [162]:
##TODO replace hardcoded station values when warehouse complete
metro_lrt = 93 - 16
metro_hrt = 15
metro_interchange = 1
metrolink = 62 - lossan_stations[lossan_stations['COMM_OP'] == 'Metrolink'].shape[0] ## all stns minus LOSSAN stns
# coaster = 8 ## on LOSSAN, so not a connection?
sdmts_lrt = 103
sdmts_ferry = 3

In [163]:
rail_validators_needed = (pd.Series({'lrt_stn':metro_lrt+sdmts_lrt, 'hrt_stn':metro_hrt,
                                     'interchange':metro_interchange,'commuter_rail_stn':metrolink,
                                     'ferry_stn':sdmts_ferry})
                                    * validators_needed).dropna()
rail_validators_needed

commuter_rail_stn     222.0
ferry_stn              12.0
hrt_stn               225.0
interchange            20.0
lrt_stn              1080.0
dtype: float64

## Direct LOSSAN Connections

In [20]:
## We don't quite have this in data warehouse yet...
## from https://gis.data.ca.gov/datasets/63697b01616b4df68e2b316e73d7a4d6_0/
ca_rail = gpd.read_file('./California_Rail_Stations.geojson')

In [82]:
lossan_lines = ['Ventura County Line',
                'Burbank-Bob Hope Airport,Ventura County Line',
                '91 Line,Antelope Valley Line,Burbank-Bob Hope Airport,Orange County Line,Riverside Line,San Bernardino Line,Ventura County Line',
                'Antelope Valley Line,Burbank-Bob Hope Airport,Ventura County Line',
                'Orange County Line',
                '91 Line,Orange County Line',
                'Inland Empire-Orange County Line,Orange County Line',
                'Coaster,Inland Empire-Orange County Line,Orange County Line',
                'Coaster',
                ]

In [90]:
lossan_stations = ca_rail[ca_rail['PASS_NETWO'].str.contains('Surfliner')
                       | ca_rail['COMM_NETWO'].isin(lossan_lines)]

In [92]:
lossan_county_stops = gpd.GeoDataFrame(
                lossan_county_stops,
                geometry = gpd.points_from_xy(lossan_county_stops.stop_lon, lossan_county_stops.stop_lat),
                crs='EPSG:4326')

In [93]:
## project stations for buffer
lossan_stations = lossan_stations.to_crs('EPSG:6414') ## https://epsg.io/6414 (meters)
lossan_stations.geometry = lossan_stations.geometry.buffer(800) ## 800 meter buffer ~ .5mi

In [100]:
lossan_connect_stops = gpd.clip(lossan_county_stops.to_crs('EPSG:6414'), lossan_stations)

In [136]:
# lossan_stations.plot()

In [137]:
# lossan_county_stops.plot()

In [138]:
# lossan_connect_stops.plot()

In [119]:
lossan_connect_operators = (transit_stacks 
     >> filter(_.itp_id.isin(lossan_connect_stops.itp_id))
     >> select(*(info_cols + vehicle_cols + paratransit_cols))
     >> collect()
    )

In [123]:
lossan_connect_operators.replace(' ', np.nan, inplace=True)
for col in (vehicle_cols + paratransit_cols):
    lossan_connect_operators = lossan_connect_operators.astype({col:'float64'})

lossan_connect_operators[vehicle_cols].sum()

bus                         3836.0
articulated_bus              561.0
over_the_road_bus            196.0
school_bus                     0.0
trolleybus                     0.0
vintage_historic_trolley       2.0
streetcar                      0.0
dtype: float64

### Operators connecting directly to LOSSAN

In [158]:
##TODO clean up format, present tables+graphics

In [155]:
(pd.Series(validators_needed) * lossan_connect_operators[vehicle_cols].sum()).dropna().append(rail_validators_needed)

articulated_bus             1683.0
bus                         7672.0
over_the_road_bus            196.0
trolleybus                     0.0
vintage_historic_trolley       4.0
commuter_rail_stn            222.0
ferry_stn                     12.0
hrt_stn                      225.0
interchange                   20.0
lrt_stn                     1080.0
dtype: float64

#### With paratransit vehicles

In [156]:
(pd.Series(validators_needed) * lossan_connect_operators[(vehicle_cols + paratransit_cols)].sum()).dropna().append(rail_validators_needed)

articulated_bus             1683.0
automobile                   277.0
bus                         7672.0
cutaway                      423.0
minivan                       50.0
over_the_road_bus            196.0
trolleybus                     0.0
van                         1413.0
vintage_historic_trolley       4.0
commuter_rail_stn            222.0
ferry_stn                     12.0
hrt_stn                      225.0
interchange                   20.0
lrt_stn                     1080.0
dtype: float64

### Operators within LOSSAN Counties

In [157]:
(pd.Series(validators_needed) * lossan_operators[vehicle_cols].sum()).dropna().append(rail_validators_needed)

articulated_bus             1752.0
bus                         8702.0
over_the_road_bus            229.0
trolleybus                     0.0
vintage_historic_trolley       4.0
commuter_rail_stn            222.0
ferry_stn                     12.0
hrt_stn                      225.0
interchange                   20.0
lrt_stn                     1080.0
dtype: float64

#### With paratransit vehicles

In [160]:
((pd.Series(validators_needed) * lossan_operators[(vehicle_cols) + paratransit_cols].sum())
     .dropna()
     .append(rail_validators_needed)
).sum()

17474.0