In [2]:
# !pip install -r requirements.txt

In [1]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
import shapely

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON
from ipywidgets import Text, HTML

from utilities import *

E0426 17:57:41.502247803    1009 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


In [13]:
bus_hqtc = gpd.read_parquet(f'{GCS_FILE_PATH}shape_hqta_dissolve.parquet')
bus_hqtc = bus_hqtc[bus_hqtc['hq_transit_corr']]
bus_hqtc['hqta_type'] = 'hq_transit_corr'
bus_hqtc['route_type'] = '3'

In [164]:
rail_ferry_brt_stops = gpd.read_parquet(f'{GCS_FILE_PATH}rail_brt_ferry.parquet')
rail_ferry_brt_stops['hqta_type'] = 'major_transit_stop'

In [165]:
rail_ferry_brt_stops.columns

Index(['calitp_itp_id', 'calitp_url_number', 'stop_id', 'stop_lat', 'stop_lon',
       'route_type', 'geometry', 'hqta_type'],
      dtype='object')

In [166]:
# geoparquet_gcs_export(bus_hqtc, 'bus_hqtc')

In [167]:
# geoparquet_gcs_export(rail_ferry_brt_stops, 'rail_ferry_brt_stops')

### High Quality Transit Areas Relevant Statutes

[PRC 21155](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21155.&lawCode=PRC)
* _(3) be within one-half mile of a major transit stop or high-quality transit corridor included in a regional transportation plan._
* Major transit stop definition: _A major transit stop is as defined in Section 21064.3, except that, for purposes of this section, it also includes major transit stops that are included in the applicable regional transportation plan_
* High-quality transit corridor definition: _For purposes of this section, a high-quality transit corridor means a corridor with fixed route bus service with service intervals no longer than 15 minutes during peak commute hours._
    * Unable to locate definition of "peak commute hours"

[PRC 21064.3](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21064.3.&lawCode=PRC)
* _Major transit stop means a site containing any of the following:
(a) An existing rail or bus rapid transit station.
(b) A ferry terminal served by either a bus or rail transit service.
(c) The intersection of two or more major bus routes with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods._
    * "Intersection" may not be sufficiently well-defined for this analysis

[PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit)
* _(a) “Bus rapid transit” means a public mass transit service provided by a public agency or by a public-private partnership that includes all of the following features:
(1) Full-time dedicated bus lanes or operation in a separate right-of-way dedicated for public transportation with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods.
(2) Transit signal priority.
(3) All-door boarding.
(4) Fare collection system that promotes efficiency.
(5) Defined stations._
    * Unlikely to determine if a service qualifies as BRT under this definition using GTFS alone

## Bus Major Stops

In [168]:
bus_hqtc.head(3)

Unnamed: 0,hq_transit_corr,shape_id,geometry,calitp_itp_id,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,hqta_type,route_type
2256,True,01582b61-5a02-4e6c-bd37-bad46bb036a9,"POLYGON ((-133311.510 166655.910, -133353.419 ...",48,1937384376,0,12f4ecb6-c161-480e-8ec2-d7e3b63c7e38,7.0,8.0,hq_transit_corr,3
2257,True,0500,"POLYGON ((212373.929 -486798.063, 212402.264 -...",142,4126984225,1,4654,5.0,7.0,hq_transit_corr,3
2258,True,0843,"MULTIPOLYGON (((200815.723 -472198.343, 200811...",142,1555422069,0,6679,5.0,6.0,hq_transit_corr,3


In [169]:
gdf = bus_hqtc
output = gpd.GeoDataFrame()

def find_intersections(row):
    global output
    # display(row)
    # display(row.shape_id)
    this_row = gdf >> filter(_.shape_id == row.shape_id)
    not_this_row = gdf >> filter(_.shape_id != row.shape_id)
    # print(type(filtered))
    clip_row = gpd.clip(this_row, not_this_row)
    output = output.append(clip_row)
    return

In [170]:
# _test = bus_hqtc.apply(find_intersections, axis=1)

In [171]:
# output.geometry = output.geometry.buffer(50)

In [172]:
# geoparquet_gcs_export(output, 'major_bus_stops_working')

In [173]:
# major_bus = gpd.read_parquet(f'{GCS_FILE_PATH}major_bus_stops_working.parquet')

In [174]:
def drop_big_areas(geometry):
    if type(geometry) == shapely.geometry.multipolygon.MultiPolygon:
        filtered = [x for x in list(geometry.geoms) if x.length < 1000]
        if len(filtered) > 0:
            return shapely.geometry.MultiPolygon(filtered)
    elif type(geometry) == shapely.geometry.polygon.Polygon:
        if geometry.length < 1000:
            return geometry
    else:
        return np.nan

In [175]:
# major_bus['geometry'] = major_bus.geometry.apply(drop_big_areas)

In [176]:
row_per_stop = gpd.GeoDataFrame()
def explode_geoms(row):
    global row_per_stop
    if type(row.geometry) == shapely.geometry.multipolygon.MultiPolygon:
        for geom in row.geometry.geoms:
            row.geometry = geom.centroid
            row_per_stop = row_per_stop.append(row)
    elif type(row.geometry) == shapely.geometry.polygon.Polygon:
        row.geometry = row.geometry.centroid
        row_per_stop = row_per_stop.append(row)
    
    return row

In [177]:
# major_bus = major_bus.dropna(subset=['geometry'])

# major_bus.apply(explode_geoms, axis=1)

# row_per_stop = row_per_stop.reset_index(drop=True)

# row_per_stop = row_per_stop[['calitp_itp_id', 'stop_id', 'geometry']]

# row_per_stop['hqta_type'] = 'major_transit_stop'

# row_per_stop = row_per_stop.set_crs('EPSG:6414')

In [178]:
# row_per_stop.head(3)

In [179]:
# geoparquet_gcs_export(row_per_stop, 'major_bus_stops')

In [180]:
major_bus_stops = gpd.read_parquet(f'{GCS_FILE_PATH}major_bus_stops.parquet')

In [181]:
major_bus_stops.head(3)

Unnamed: 0,calitp_itp_id,stop_id,geometry,hqta_type
0,142,3977,POINT (199058.382 -471125.032),major_transit_stop
1,142,3977,POINT (198531.592 -471368.189),major_transit_stop
2,142,3977,POINT (193068.693 -465847.391),major_transit_stop


In [182]:
tbl_stops = (tbl.views.gtfs_schedule_fact_daily_feed_stops()
 >> filter(_.date == '2021-12-01')
 >> filter(_.calitp_extracted_at < '2021-12-01')
 >> filter(_.calitp_deleted_at > '2021-12-01')
 >> select(_.stop_key)
 >> inner_join(_, tbl.views.gtfs_schedule_dim_stops(), on = 'stop_key')
 >> select(_.stop_id, _.stop_lat, _.stop_lon, _.calitp_itp_id)
 >> collect()
)

In [183]:
tbl_stops = gpd.GeoDataFrame(tbl_stops,
                 geometry = gpd.points_from_xy(tbl_stops.stop_lon, tbl_stops.stop_lat),
                 crs = 'EPSG:4326').to_crs('EPSG:6414') ## https://epsg.io/6414 (meters)

In [184]:
tbl_stops.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,stop_id.1,stop_lat.1,stop_lon.1,calitp_itp_id.1,geometry
0,5090,36.080261,-119.021684,256,POINT (87996.619 -214744.271),,,,
1,8050,36.061094,-118.99488,256,POINT (90430.048 -216848.624),,,,
2,1100,36.073048,-119.020918,256,POINT (88073.792 -215544.973),,,,


In [185]:
to_join = major_bus_stops[['calitp_itp_id']].astype({'calitp_itp_id': 'int64'})

In [186]:
tbl_stops_major = (tbl_stops
         >> inner_join(_, to_join, on=['calitp_itp_id'])
         >> distinct(_.calitp_itp_id, _.stop_id, _keep_all=True)
            )


In [187]:
tbl_stops_major.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,3971,37.976371,-122.319663,4,POINT (-203473.763 -1956.750)
1,2050,37.789595,-122.2458,4,POINT (-197487.747 -22870.228)
2,3445,37.76742,-122.196375,4,POINT (-193199.376 -25436.510)


In [188]:
tbl_stops_major.geometry = tbl_stops_major.buffer(100)

In [189]:
tbl_stops_major.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,3971,37.976371,-122.319663,4,"POLYGON ((-203373.763 -1956.750, -203374.244 -..."
1,2050,37.789595,-122.2458,4,"POLYGON ((-197387.747 -22870.228, -197388.229 ..."
2,3445,37.76742,-122.196375,4,"POLYGON ((-193099.376 -25436.510, -193099.857 ..."


In [190]:
drop_id = major_bus_stops.drop(columns=['stop_id'])

In [191]:
spatial_stops_major = tbl_stops_major.sjoin(drop_id, how='inner', predicate='contains')

In [192]:
new_major_stops = spatial_stops_major.drop_duplicates(subset=['stop_id', 'calitp_itp_id_left', 'calitp_itp_id_right'])

In [193]:
new_major_stops.geometry = new_major_stops.centroid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [194]:
# map_hqta(new_major_stops)

### Bus Corridors to Stops Along Corridor

In [195]:
tbl_stops_corridors = tbl_stops

In [196]:
tbl_stops_corridors.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,5090,36.080261,-119.021684,256,POINT (87996.619 -214744.271)
1,8050,36.061094,-118.99488,256,POINT (90430.048 -216848.624)
2,1100,36.073048,-119.020918,256,POINT (88073.792 -215544.973)


In [197]:
bus_hqtc_extract.head(3)

Unnamed: 0,calitp_itp_id,stop_id,hqta_type,geometry
2256,48,12f4ecb6-c161-480e-8ec2-d7e3b63c7e38,hq_corridor_bus,"POLYGON ((-133311.510 166655.910, -133353.419 ..."
2257,142,4654,hq_corridor_bus,"POLYGON ((212373.929 -486798.063, 212402.264 -..."
2258,142,6679,hq_corridor_bus,"MULTIPOLYGON (((200815.723 -472198.343, 200811..."


In [198]:
stops_in_corridor = tbl_stops_corridors.clip(bus_hqtc_extract)

In [199]:
stops_in_corridor.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 32894 entries, 26531 to 121250
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   stop_id        32894 non-null  object  
 1   stop_lat       32894 non-null  float64 
 2   stop_lon       32894 non-null  float64 
 3   calitp_itp_id  32894 non-null  int64   
 4   geometry       32894 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 1.5+ MB


In [200]:
stops_in_corridor.drop_duplicates(subset = ['stop_id', 'calitp_itp_id']).info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 32009 entries, 26531 to 121250
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   stop_id        32009 non-null  object  
 1   stop_lat       32009 non-null  float64 
 2   stop_lon       32009 non-null  float64 
 3   calitp_itp_id  32009 non-null  int64   
 4   geometry       32009 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 1.5+ MB


In [201]:
# map_hqta(bus_hqtc_extract)

In [202]:
# map_hqta(stops_in_corridor)

#### Definitions and Output:

* hqta_type: major_transit_stop
* stop_id: one stop id... (not ideal, but oh well)
* geometry: .buffer(700)?
* _can alway pull more info spatially_

## Unbuffered Export

Thank you for this data. It would be useful for us to get the HQTC stops as a point data file, not a polygon. Also, if you could differentiate between train, bus, BRT, and ferry stop that would be immensely helpful.

Let me know if it is possible to get the data in this format.  

#### Major Transit Stops (bus intersections)

In [203]:
new_major_stops.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id_left,geometry,index_right,calitp_itp_id_right,hqta_type
1,2050,37.789595,-122.2458,4,POINT (-197487.747 -22870.228),1873,4,major_transit_stop
661,6160,37.789312,-122.245784,4,POINT (-197487.086 -22901.713),1873,4,major_transit_stop
868,6044,37.789646,-122.246117,4,POINT (-197515.484 -22863.900),1873,4,major_transit_stop


In [204]:
new_major_bus_extract = new_major_stops >> select(_.calitp_itp_id == _.calitp_itp_id_left, _.stop_id,
                          _.calitp_itp_id_2 == _.calitp_itp_id_right, _.hqta_type, _.geometry
                         )
new_major_bus_extract['hqta_type'] = 'major_stop_bus'
new_major_bus_extract.head(3)

Unnamed: 0,calitp_itp_id,stop_id,calitp_itp_id_2,hqta_type,geometry
1,4,2050,4,major_stop_bus,POINT (-197487.747 -22870.228)
661,4,6160,4,major_stop_bus,POINT (-197487.086 -22901.713)
868,4,6044,4,major_stop_bus,POINT (-197515.484 -22863.900)


#### Stops Along HQ Transit Corridors (bus)

In [205]:
stops_in_corridor.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
26531,262,34.420949,-119.7036,293,POINT (27236.798 -399421.201)
66308,S80,34.421013,-119.703516,169,POINT (27244.495 -399414.075)
26776,113,34.420955,-119.670648,293,POINT (30264.810 -399410.543)


In [206]:
stops_extract = stops_in_corridor >> select(_.calitp_itp_id, _.stop_id, _.geometry)

In [207]:
stops_extract['hqta_type'] = 'hq_corridor_bus'

In [208]:
stops_extract.head(3)

Unnamed: 0,calitp_itp_id,stop_id,geometry,hqta_type
26531,293,262,POINT (27236.798 -399421.201),hq_corridor_bus
66308,169,S80,POINT (27244.495 -399414.075),hq_corridor_bus
26776,293,113,POINT (30264.810 -399410.543),hq_corridor_bus


#### Major Transit Stops (rail/ferry/brt)

In [209]:
rail_ferry_brt_stops.head(3)

Unnamed: 0,calitp_itp_id,calitp_url_number,stop_id,stop_lat,stop_lon,route_type,geometry,hqta_type
1066,182,1,80101,33.768071,-118.192921,0,POINT (167426.752 -470294.073),major_transit_stop
1078,182,1,80153,33.76874,-118.189362,0,POINT (167755.039 -470213.608),major_transit_stop
1076,182,1,80102,33.772258,-118.1937,0,POINT (167345.741 -469831.223),major_transit_stop


In [210]:
rail_ferry_brt_extract = rail_ferry_brt_stops >> select(_.calitp_itp_id == _.calitp_itp_id, _.stop_id,
                                      _.hqta_type, _.route_type, _.geometry
                         )

In [211]:
rail_ferry_brt_extract = (rail_ferry_brt_extract
    >> mutate(
    hqta_type = case_when({
        _.route_type.isin(['0', '1', '2']): 'major_stop_rail',
        _.route_type == '3': 'major_stop_brt',
        _.route_type == '4': 'major_stop_ferry'
        })
    )
    >> select(-_.route_type)
)

In [212]:
rail_ferry_brt_extract.head(3)

Unnamed: 0,calitp_itp_id,stop_id,hqta_type,geometry
1066,182,80101,major_stop_rail,POINT (167426.752 -470294.073)
1078,182,80153,major_stop_rail,POINT (167755.039 -470213.608)
1076,182,80102,major_stop_rail,POINT (167345.741 -469831.223)


In [213]:
points_combined = new_major_bus_extract.append(stops_extract).append(rail_ferry_brt_extract)

In [214]:
points_combined.dtypes

calitp_itp_id         int64
stop_id              object
calitp_itp_id_2      object
hqta_type            object
geometry           geometry
dtype: object

In [215]:
points_combined

Unnamed: 0,calitp_itp_id,stop_id,calitp_itp_id_2,hqta_type,geometry
1,4,2050,4,major_stop_bus,POINT (-197487.747 -22870.228)
661,4,6160,4,major_stop_bus,POINT (-197487.086 -22901.713)
868,4,6044,4,major_stop_bus,POINT (-197515.484 -22863.900)
4378,4,6161,4,major_stop_bus,POINT (-197544.479 -22958.955)
6,4,5307,4,major_stop_bus,POINT (-189006.213 -28181.062)
...,...,...,...,...,...
4,232,8295,,major_stop_brt,POINT (246407.338 -422724.364)
17,232,8288,,major_stop_brt,POINT (243453.358 -421760.464)
996,323,165,,major_stop_rail,POINT (172732.681 -389214.320)
342,13,VRV,,major_stop_rail,POINT (248354.060 -383016.649)


In [161]:
# map_hqta(points_combined)

In [224]:
names = tbl.views.gtfs_agency_names() >> select(_.calitp_itp_id, _.agency_name) >> collect()
with_names = points_combined >> inner_join(_, names, on = 'calitp_itp_id')

In [231]:
with_names = with_names.drop_duplicates(subset=['calitp_itp_id', 'hqta_type', 'stop_id'])
with_names = with_names.drop(columns=['calitp_itp_id_2'])

In [233]:
with_names.head(3)

Unnamed: 0,calitp_itp_id,stop_id,hqta_type,geometry,agency_name
0,4,2050,major_stop_bus,POINT (-197487.747 -22870.228),AC Transit
1,4,6160,major_stop_bus,POINT (-197487.086 -22901.713),AC Transit
2,4,6044,major_stop_bus,POINT (-197515.484 -22863.900),AC Transit


In [234]:
geoparquet_gcs_export(with_names, 'ca_high_quality_transit_points')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  gdf.to_parquet(f"{name}.parquet")


In [235]:
with_names.to_file('./ca_high_quality_transit_point.geojson', driver='GeoJSON')

In [236]:
with_names.to_file('./ca_high_quality_transit/ca_high_quality_transit_point.shp')

  with_names.to_file('./ca_high_quality_transit/ca_high_quality_transit_point.shp')


In [237]:
with_names['hqta_type'].unique()

array(['major_stop_bus', 'hq_corridor_bus', 'major_stop_brt',
       'major_stop_rail', 'major_stop_ferry'], dtype=object)

### Combining and Buffering

* General buffer distance: 1/2mi ~= 805 meters
* Bus corridors are already buffered 50 meters, so will buffer 755 meters

In [28]:
bus_hqtc.geometry = bus_hqtc.geometry.buffer(755)
row_per_stop.geometry = row_per_stop.geometry.buffer(805)
rail_ferry_brt_stops.geometry = rail_ferry_brt_stops.geometry.buffer(805)

In [29]:
all_hqta = (bus_hqtc
            .append(rail_ferry_brt_stops)
            .append(row_per_stop)
            .fillna('')
           )

In [30]:
all_hqta = all_hqta[['calitp_itp_id', 'geometry', 'hqta_type']]
all_hqta = all_hqta.reset_index(drop=True)
all_hqta['calitp_itp_id'] = all_hqta['calitp_itp_id'].astype('int64')

### Format for export

In [31]:
agency_names = (tbl.gtfs_schedule.agency()
 >> select(_.calitp_itp_id, _.agency_name)
 >> collect()
)

In [32]:
all_hqta = all_hqta >> inner_join(_, agency_names, on='calitp_itp_id')

In [2]:
all_hqta = gpd.read_parquet(f'{GCS_FILE_PATH}ca_high_quality_transit.parquet')

E0426 17:57:45.173838895    1009 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


In [16]:
metro = bus_hqtc >> filter(_.calitp_itp_id == '182')

In [18]:
map_hqta(metro, 'pm_max_trips')

Map(center=[34.078792878404975, -118.30956119336547], controls=(ZoomControl(options=['position', 'zoom_in_text…

In [None]:
# geoparquet_gcs_export(all_hqta, 'ca_high_quality_transit')
# all_hqta.to_file('./ca_high_quality_transit.geojson', driver='GeoJSON')
# all_hqta.to_file('./ca_high_quality_transit/ca_high_quality_transit.shp')

## Static Map Images (all HQTAs)

![bay](img/bay_valley_all.png)

![fresno](img/fres_all.png)

![san_diego](img/sd_all.png)

In [None]:
# map_hqta(all_hqta)

In [15]:
all_hqta

Unnamed: 0,calitp_itp_id,geometry,hqta_type,agency_name
0,48,"POLYGON ((-133534.279 167527.858, -133517.826 ...",hq_transit_corr,B-Line
1,48,"MULTIPOLYGON (((-157718.392 195993.182, -15773...",hq_transit_corr,B-Line
2,142,"POLYGON ((211656.399 -486108.638, 211648.381 -...",hq_transit_corr,Orange County Transportation Authority
3,142,"MULTIPOLYGON (((199947.647 -472521.650, 199947...",hq_transit_corr,Orange County Transportation Authority
4,142,"MULTIPOLYGON (((187094.291 -451836.591, 187091...",hq_transit_corr,Orange County Transportation Authority
...,...,...,...,...
31942,361,"POLYGON ((62774.123 -187182.324, 62770.247 -18...",major_transit_stop,Visalia Transit
31943,301,"POLYGON ((-235898.301 50349.674, -235902.177 5...",major_transit_stop,Santa Rosa CityBus
31944,301,"POLYGON ((-235898.301 50349.674, -235902.177 5...",major_transit_stop,Santa Rosa CityBus
31945,201,"POLYGON ((-209333.821 -24661.220, -209337.698 ...",major_transit_stop,Mission Bay TMA
