In [26]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON
from ipywidgets import Text, HTML

from utilities import *

[PRC 21064.3](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21064.3.&lawCode=PRC)
* _Major transit stop means a site containing any of the following:
(a) An existing rail or bus rapid transit station.
(b) A ferry terminal served by either a bus or rail transit service.
(c) The intersection of two or more major bus routes with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods._
    * "Intersection" may not be sufficiently well-defined for this analysis

[PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit)
* _(a) “Bus rapid transit” means a public mass transit service provided by a public agency or by a public-private partnership that includes all of the following features:
(1) Full-time dedicated bus lanes or operation in a separate right-of-way dedicated for public transportation with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods.
(2) Transit signal priority.
(3) All-door boarding.
(4) Fare collection system that promotes efficiency.
(5) Defined stations._
    * Unlikely to determine if a service qualifies as BRT under this definition using GTFS alone

### Rail Service

In [3]:
rail_routes = (tbl.gtfs_schedule.routes()
               >> filter(_.route_type.isin(['0', '1', '2']))
               >> select(_.calitp_itp_id, _.route_id)
               >> collect())

In [4]:
def routes_to_stops(df_routes):
    df_routes['itp_id_route_id'] = df_routes.calitp_itp_id.astype(str) + df_routes.route_id
    
    df_trips = (tbl.gtfs_schedule.trips()
             >> filter(_.calitp_itp_id.isin(df_routes.calitp_itp_id))
             >> collect()
             >> select(_.calitp_itp_id, _.route_id, _.trip_id)
             >> filter(_.route_id.isin(df_routes.route_id)))
    
    df_trips['itp_id_route_id'] = df_trips.calitp_itp_id.astype(str) + df_trips.route_id
    df_trips.drop(columns=['calitp_itp_id', 'route_id'], inplace=True)
    
    df_trips = df_routes >> inner_join(_, df_trips, on='itp_id_route_id')
    df_trips['itp_id_trip_id'] = df_trips.calitp_itp_id.astype(str) + df_trips.trip_id
    df_operators = [int(str(x)) for x in list(df_trips.calitp_itp_id.unique())]
    
    df_stop_times = (tbl.gtfs_schedule.stop_times()
                  >> filter(_.calitp_itp_id.isin(df_operators))
                  >> select(_.calitp_itp_id, _.stop_id, _.trip_id)
                  >> collect()
                    )
    df_stop_times['itp_id_trip_id'] = df_stop_times.calitp_itp_id.astype(str) + df_stop_times.trip_id
    df_stop_times.drop(columns=['calitp_itp_id', 'trip_id'], inplace=True)
    df_stop_times = df_stop_times >> inner_join(_, df_trips, on='itp_id_trip_id')
    df_stop_times = df_stop_times.drop_duplicates(subset=['stop_id', 'calitp_itp_id'])
    df_stop_times['itp_id_stop_id'] = df_stop_times.calitp_itp_id.astype(str) + df_stop_times.stop_id
    
    df_stops = (tbl.gtfs_schedule.stops()
              >> select(_.stop_id, _.calitp_itp_id, _.stop_lat, _.stop_lon)
              >> filter(_.calitp_itp_id.isin(df_stop_times.calitp_itp_id))
              >> collect()
             )
    
    df_stops['itp_id_stop_id'] = df_stops.calitp_itp_id.astype(str) + df_stops.stop_id
    df_stop_times.drop(columns=['calitp_itp_id', 'stop_id'], inplace=True)
    df_stops = df_stops >> inner_join(_, df_stop_times, on='itp_id_stop_id')
    df_stops = gpd.GeoDataFrame(df_stops,
                              geometry = gpd.points_from_xy(df_stops.stop_lon, df_stops.stop_lat),
                              crs = 'EPSG:4326')
    return df_stops.to_crs('EPSG:6414') ## https://epsg.io/6414 (meters)

In [5]:
rail_stops = routes_to_stops(rail_routes)

In [6]:
# map_hqta(rail_stops)

#### BRT Service likely meeting [PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit) definition:

* LA Metro Orange, Silver excluding street running (stop flags only)
* Omnitrans sbX, all stops (curbside stations are well-defined, with fare prepayment)
* AC Transit Tempo, all stops (curbside stations are well-defined, with fare prepayment)

In [7]:
metro_brt = (tbl.gtfs_schedule.routes()
         >> filter(_.calitp_itp_id == 182)
         >> filter(_.route_id.isin(['901-13149', '910-13149']))
         >> collect())

In [8]:
metro_brt_stops = routes_to_stops(metro_brt)

In [9]:
## unable to filter out non-station stops using GTFS, manual list:
metro_street_running =['141012', '13805', '5397', '13803',
 '13804', '5396', '13802', '5395', '5410', '5411', '13817',
 '12304', '5408', '3821', '2603', '3153', '3124', '378', '65300039',
 '65300038', '15820', '13460', '4994', '1813', '2378', '5049',
 '4652', '2377', '4675', '5040', '65300042', '3674', '15713',
 '13561', '5378', '13560', '70500012', '5377', '15612',
 '12416', '11917', '12415', '8704']

In [10]:
metro_brt_stops = metro_brt_stops >> filter(-_.stop_id.isin(metro_street_running))

In [11]:
# map_hqta(metro_brt_stops)

In [12]:
act_brt = (tbl.gtfs_schedule.routes()
         >> filter(_.calitp_itp_id == 4)
         >> filter(_.route_id == '1T')
         >> collect())

In [13]:
act_brt_stops = routes_to_stops(act_brt)

In [14]:
# map_hqta(act_brt_stops)

In [15]:
omni_brt = (tbl.gtfs_schedule.routes()
         >> filter(_.calitp_itp_id == 232)
         >> filter(_.route_id == '9648')
         >> collect())

In [16]:
omni_brt_stops = routes_to_stops(omni_brt)

In [17]:
# map_hqta(omni_brt_stops)

### Ferry

In [18]:
ferry = (tbl.gtfs_schedule.routes()
         >> filter(_.route_type == '4')
         # >> filter(_.route_id == '9648')
         >> collect())

In [19]:
ferry_stops = routes_to_stops(ferry)
angel_and_alcatraz = ['2483552', '2483550'] ##only stops without bus service, TODO implement algorithm
ferry_stops = ferry_stops >> filter(-_.stop_id.isin(angel_and_alcatraz))

In [20]:
# map_hqta(ferry_stops)

## Combined

In [21]:
rail_brt_ferry = (rail_stops
                 .append(metro_brt_stops)
                 .append(act_brt_stops)
                 .append(omni_brt_stops)
                 .append(ferry_stops))

In [22]:
rail_brt_ferry

Unnamed: 0,stop_id,calitp_itp_id,stop_lat,stop_lon,itp_id_stop_id,itp_id_trip_id,route_id,itp_id_route_id,trip_id,geometry,...,route_short_name,route_long_name,route_desc,route_url,route_color,route_text_color,route_sort_order,continuous_pickup,continuous_drop_off,calitp_extracted_at
0,CRN,13,41.056920,-94.361617,13CRN,1352818299,96,1396,52818299,POINT (2130400.260 627641.309),...,,,,,,,,,,
1,MKA,13,42.940583,-87.924359,13MKA,133412815756,54,1354,3412815756,POINT (2577583.013 986856.496),...,,,,,,,,,,
2,OKJ,13,37.793866,-122.271667,13OKJ,135452816206,84,1384,5452816206,POINT (-199750.592 -22341.380),...,,,,,,,,,,
3,PLO,13,41.662384,-88.538272,13PLO,133832815187,93,1393,3832815187,POINT (2576234.283 836414.716),...,,,,,,,,,,
4,QCY,13,39.957063,-91.368525,13QCY,133832815187,93,1393,3832815187,POINT (2408238.879 582112.897),...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,2483569,41,37.856636,-122.478275,412483569,41t_373938_b_28421_tn_0,11643,4111643,t_373938_b_28421_tn_0,POINT (-217731.552 -14910.728),...,,Pier 41 - Sausalito - Tiburon - Angel Island,,,f5cb04,000000,3,1,1,2021-05-03
77,GF:43000,200,37.795869,-122.392450,200GF:43000,200GF:7296302,GF:SF,200GF:SF,GF:7296302,POINT (-210363.380 -21857.544),...,SF,Sausalito - San Francisco Ferry,,http://goldengateferry.org/schedules/Sausalito...,0000FF,FFFFFF,0,,,2021-11-07
78,GF:43003,200,37.856401,-122.478251,200GF:43003,200GF:7296302,GF:SF,200GF:SF,GF:7296302,POINT (-217730.126 -14936.899),...,SF,Sausalito - San Francisco Ferry,,http://goldengateferry.org/schedules/Sausalito...,0000FF,FFFFFF,0,,,2021-11-07
79,GF:43004,200,37.945316,-122.508735,200GF:43004,200GF:7296280,GF:LF,200GF:LF,GF:7296280,POINT (-220146.197 -4985.515),...,LF,Larkspur - San Francisco Ferry,,http://goldengateferry.org/schedules/Larkspur.php,0000FF,FFFFFF,0,,,2021-11-07


In [24]:
# map_hqta(rail_brt_ferry)

In [27]:
geoparquet_gcs_export(rail_brt_ferry, 'rail_brt_ferry')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  gdf.to_parquet(f"{name}.parquet")
