In [1]:
# !pip install -r requirements.txt

In [1]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
import shapely

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON
from ipywidgets import Text, HTML

from utilities import *



In [2]:
analysis_date = dt.date(2022, 6, 15) ## Wed, June 15

In [4]:
bus_hqtc = gpd.read_parquet(f'{GCS_FILE_PATH}intermediate/shape_dissolve.parquet')
bus_hqtc = bus_hqtc[bus_hqtc['hq_transit_corr']]
bus_hqtc['hqta_type'] = 'hq_transit_corr'
bus_hqtc['route_type'] = '3'

In [5]:
rail_ferry_brt_stops = gpd.read_parquet(f'{GCS_FILE_PATH}rail_brt_ferry.parquet')
rail_ferry_brt_stops['hqta_type'] = 'major_transit_stop'

In [6]:
rail_ferry_brt_stops.columns

Index(['calitp_itp_id', 'stop_id', 'stop_lat', 'stop_lon', 'stop_name',
       'route_type', 'geometry', 'hqta_type'],
      dtype='object')

In [5]:
# geoparquet_gcs_export(bus_hqtc, 'bus_hqtc')

In [6]:
# geoparquet_gcs_export(rail_ferry_brt_stops, 'rail_ferry_brt_stops')

### High Quality Transit Areas Relevant Statutes

[PRC 21155](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21155.&lawCode=PRC)
* _(3) be within one-half mile of a major transit stop or high-quality transit corridor included in a regional transportation plan._
* Major transit stop definition: _A major transit stop is as defined in Section 21064.3, except that, for purposes of this section, it also includes major transit stops that are included in the applicable regional transportation plan_
* High-quality transit corridor definition: _For purposes of this section, a high-quality transit corridor means a corridor with fixed route bus service with service intervals no longer than 15 minutes during peak commute hours._
    * Unable to locate definition of "peak commute hours"

[PRC 21064.3](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21064.3.&lawCode=PRC)
* _Major transit stop means a site containing any of the following:
(a) An existing rail or bus rapid transit station.
(b) A ferry terminal served by either a bus or rail transit service.
(c) The intersection of two or more major bus routes with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods._
    * "Intersection" may not be sufficiently well-defined for this analysis

[PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit)
* _(a) “Bus rapid transit” means a public mass transit service provided by a public agency or by a public-private partnership that includes all of the following features:
(1) Full-time dedicated bus lanes or operation in a separate right-of-way dedicated for public transportation with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods.
(2) Transit signal priority.
(3) All-door boarding.
(4) Fare collection system that promotes efficiency.
(5) Defined stations._
    * Unlikely to determine if a service qualifies as BRT under this definition using GTFS alone

## Bus Major Stops

In [7]:
bus_hqtc.head(3)

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3109,True,500,142,"MULTIPOLYGON (((212373.973 -486798.157, 212402...",0,2197551287,0,3977,10.0,10.0,,,hq_transit_corr,3
3110,True,500,235,"MULTIPOLYGON (((212373.973 -486798.157, 212402...",1,3570856290,0,3977,10.0,10.0,,,hq_transit_corr,3
3112,True,843,142,"MULTIPOLYGON (((200824.762 -472153.347, 200815...",0,1555422069,0,6679,5.0,7.0,,,hq_transit_corr,3


In [8]:
gdf = bus_hqtc
output = gpd.GeoDataFrame()

def find_intersections(row):
    global output
    # display(row)
    # display(row.shape_id)
    this_row = gdf >> filter(_.shape_id == row.shape_id)
    not_this_row = gdf >> filter(_.shape_id != row.shape_id)
    # print(type(filtered))
    clip_row = gpd.clip(this_row, not_this_row)
    output = pd.concat((output, clip_row))
    return

In [9]:
from tqdm import tqdm

In [10]:
tqdm.pandas()

In [11]:
# # ran 6/23
# _test = bus_hqtc.progress_apply(find_intersections, axis=1)

In [13]:
output.geometry = output.geometry.buffer(50)

In [14]:
# shared_utils.utils.geoparquet_gcs_export(output, f'{GCS_FILE_PATH}', f'major_bus_stops_working')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



In [12]:
major_bus = gpd.read_parquet(f'{GCS_FILE_PATH}major_bus_stops_working.parquet')

In [13]:
# map_hqta(major_bus)

In [14]:
def drop_big_areas(geometry):
    if type(geometry) == shapely.geometry.multipolygon.MultiPolygon:
        filtered = [x for x in list(geometry.geoms) if x.length < 1000]
        if len(filtered) > 0:
            return shapely.geometry.MultiPolygon(filtered)
    elif type(geometry) == shapely.geometry.polygon.Polygon:
        if geometry.length < 1000:
            return geometry
    else:
        return np.nan

In [19]:
major_bus['geometry'] = major_bus.geometry.apply(drop_big_areas)

In [89]:
major_bus = major_bus.dropna(subset=['geometry'])

In [93]:
row_per_stop = major_bus.explode(ignore_index=True)

In [94]:
row_per_stop = row_per_stop[['calitp_itp_id', 'stop_id', 'geometry']]

row_per_stop['hqta_type'] = 'major_transit_stop'

row_per_stop = row_per_stop.set_crs(
    shared_utils.geography_utils.CA_NAD83Albers)

In [98]:
# shared_utils.utils.geoparquet_gcs_export(row_per_stop, f'{GCS_FILE_PATH}', f'major_bus_stops')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



In [15]:
major_bus_stops = gpd.read_parquet(f'{GCS_FILE_PATH}major_bus_stops.parquet')

In [16]:
major_bus_stops.head(3)

Unnamed: 0,calitp_itp_id,stop_id,geometry,hqta_type
0,142,3977,"POLYGON ((212877.188 -487301.779, 212872.742 -...",major_transit_stop
1,142,3977,"POLYGON ((212369.750 -486959.092, 212367.490 -...",major_transit_stop
2,142,3977,"POLYGON ((198813.879 -471103.355, 198814.417 -...",major_transit_stop


In [17]:
# map_hqta(major_bus_stops)

### Refine Major Stops

In [19]:
tbl_stops = (tbl.views.gtfs_schedule_fact_daily_feed_stops()
 >> filter(_.date == analysis_date)
 >> filter(_.calitp_extracted_at < analysis_date)
 >> filter(_.calitp_deleted_at > analysis_date)
 >> select(_.stop_key)
 >> inner_join(_, tbl.views.gtfs_schedule_dim_stops(), on = 'stop_key')
 >> select(_.stop_id, _.stop_lat, _.stop_lon, _.calitp_itp_id)
 >> filter(_.calitp_itp_id != 200)
 >> collect()
)

In [20]:
tbl_stops = gpd.GeoDataFrame(tbl_stops,
                 geometry = gpd.points_from_xy(tbl_stops.stop_lon, tbl_stops.stop_lat),
                 crs = 'EPSG:4326').to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [21]:
tbl_stops.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,102,33.816115,-118.386803,260,POINT (149374.677 -465290.968)
1,103,33.819139,-118.388194,260,POINT (149240.188 -464957.847)
2,2619811,34.093286,-118.125155,6,POINT (172991.223 -434108.437)


In [22]:
to_join = major_bus_stops[['calitp_itp_id']].astype({'calitp_itp_id': 'int64'})

In [23]:
## all stops for an operator with at least 1 major stop
tbl_stops_major = (tbl_stops
         >> inner_join(_, to_join, on=['calitp_itp_id'])
         >> distinct(_.calitp_itp_id, _.stop_id, _keep_all=True)
            )


In [24]:
tbl_stops_major.shape

(85318, 5)

In [25]:
major_bus_stops.shape

(2676, 4)

In [26]:
drop_id = major_bus_stops.drop(columns=['stop_id'])

In [27]:
spatial_stops_major = tbl_stops_major.sjoin(drop_id, how='inner', predicate='within')

In [28]:
new_major_stops = spatial_stops_major.drop_duplicates(subset=['stop_id', 'calitp_itp_id_left', 'calitp_itp_id_right'])

In [29]:
new_major_stops.geometry = new_major_stops.centroid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [31]:
new_major_stops.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 5961 entries, 83 to 85065
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   stop_id              5961 non-null   object  
 1   stop_lat             5961 non-null   float64 
 2   stop_lon             5961 non-null   float64 
 3   calitp_itp_id_left   5961 non-null   int64   
 4   geometry             5961 non-null   geometry
 5   index_right          5961 non-null   int64   
 6   calitp_itp_id_right  5961 non-null   int64   
 7   hqta_type            5961 non-null   object  
dtypes: float64(2), geometry(1), int64(3), object(2)
memory usage: 419.1+ KB


In [32]:
# map_hqta(new_major_stops)

### Bus Corridors to Stops Along Corridor

In [33]:
tbl_stops_corridors = tbl_stops

In [34]:
tbl_stops_corridors.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,102,33.816115,-118.386803,260,POINT (149374.677 -465290.968)
1,103,33.819139,-118.388194,260,POINT (149240.188 -464957.847)
2,2619811,34.093286,-118.125155,6,POINT (172991.223 -434108.437)


In [35]:
bus_hqtc.head(3)

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3109,True,500,142,"MULTIPOLYGON (((212373.973 -486798.157, 212402...",0,2197551287,0,3977,10.0,10.0,,,hq_transit_corr,3
3110,True,500,235,"MULTIPOLYGON (((212373.973 -486798.157, 212402...",1,3570856290,0,3977,10.0,10.0,,,hq_transit_corr,3
3112,True,843,142,"MULTIPOLYGON (((200824.762 -472153.347, 200815...",0,1555422069,0,6679,5.0,7.0,,,hq_transit_corr,3


In [36]:
stops_in_corridor = tbl_stops_corridors.clip(bus_hqtc)

In [37]:
stops_in_corridor.drop_duplicates(subset = ['stop_id', 'calitp_itp_id']).info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 33337 entries, 4709 to 30021
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   stop_id        33337 non-null  object  
 1   stop_lat       33337 non-null  float64 
 2   stop_lon       33337 non-null  float64 
 3   calitp_itp_id  33337 non-null  int64   
 4   geometry       33337 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 1.5+ MB


In [38]:
# map_hqta(bus_hqtc)

In [39]:
# stop_subset = stops_in_corridor >> head(2000)

In [40]:
# map_hqta(stop_subset)

#### Definitions and Output:

* hqta_type: major_transit_stop
* stop_id: one stop id... (not ideal, but oh well)
* geometry: .buffer(700)?
* _can alway pull more info spatially_

## Unbuffered Export

Thank you for this data. It would be useful for us to get the HQTC stops as a point data file, not a polygon. Also, if you could differentiate between train, bus, BRT, and ferry stop that would be immensely helpful.

Let me know if it is possible to get the data in this format.  

#### Major Transit Stops (bus intersections)

In [41]:
new_major_stops.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id_left,geometry,index_right,calitp_itp_id_right,hqta_type
83,201,33.94913,-118.39182,260,POINT (148659.946 -450548.591),2628,87,major_transit_stop
22774,1635,33.949739,-118.392222,300,POINT (148621.643 -450481.683),2628,87,major_transit_stop
22786,1636,33.94975,-118.392541,300,POINT (148592.137 -450480.964),2628,87,major_transit_stop


In [42]:
new_major_bus_extract = new_major_stops >> select(_.calitp_itp_id_primary == _.calitp_itp_id_left, _.stop_id,
                          _.calitp_itp_id_secondary == _.calitp_itp_id_right, _.hqta_type, _.geometry
                         )
new_major_bus_extract['hqta_type'] = 'major_stop_bus'
new_major_bus_extract.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,calitp_itp_id_secondary,hqta_type,geometry
83,260,201,87,major_stop_bus,POINT (148659.946 -450548.591)
22774,300,1635,87,major_stop_bus,POINT (148621.643 -450481.683)
22786,300,1636,87,major_stop_bus,POINT (148592.137 -450480.964)


#### Stops Along HQ Transit Corridors (bus)

In [43]:
stops_in_corridor.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
4709,SAVBAR:1,34.160575,-119.1778,123,POINT (75803.355 -428025.323)
7534,3289697,34.160688,-119.177864,231,POINT (75797.334 -428012.869)
94904,3299402,34.16093,-119.1771,231,POINT (75867.537 -427985.411)


In [44]:
stops_extract = stops_in_corridor >> select(_.calitp_itp_id_primary == _.calitp_itp_id,
                                            _.stop_id, _.geometry)

In [45]:
stops_extract['hqta_type'] = 'hq_corridor_bus'

In [46]:
stops_extract.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,geometry,hqta_type
4709,123,SAVBAR:1,POINT (75803.355 -428025.323),hq_corridor_bus
7534,231,3289697,POINT (75797.334 -428012.869),hq_corridor_bus
94904,231,3299402,POINT (75867.537 -427985.411),hq_corridor_bus


#### Major Transit Stops (rail/ferry/brt)

In [47]:
rail_ferry_brt_stops.head(3)

Unnamed: 0,calitp_itp_id,stop_id,stop_lat,stop_lon,stop_name,route_type,geometry,hqta_type
74,13,GVB,35.12126,-120.629266,Grover Beach Amtrak,2,POINT (-57308.226 -321533.615),major_transit_stop
705,13,SLO,35.276434,-120.654701,San Luis Obispo,2,POINT (-59505.601 -304286.520),major_transit_stop
479,13,BFD,35.37214,-119.00821,Bakersfield,2,POINT (90031.416 -293392.047),major_transit_stop


In [48]:
rail_ferry_brt_extract = rail_ferry_brt_stops >> select(_.calitp_itp_id == _.calitp_itp_id, _.stop_id,
                                      _.hqta_type, _.route_type, _.geometry
                         )

In [49]:
rail_ferry_brt_extract = (rail_ferry_brt_extract
    >> mutate(
    hqta_type = case_when({
        _.route_type.isin(['0', '1', '2']): 'major_stop_rail',
        _.route_type == '3': 'major_stop_brt',
        _.route_type == '4': 'major_stop_ferry'
        })
    )
    >> select(-_.route_type, _.calitp_itp_id_primary == _.calitp_itp_id)
)

In [50]:
rail_ferry_brt_extract.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,hqta_type,geometry
74,13,GVB,major_stop_rail,POINT (-57308.226 -321533.615)
705,13,SLO,major_stop_rail,POINT (-59505.601 -304286.520)
479,13,BFD,major_stop_rail,POINT (90031.416 -293392.047)


In [51]:
points_combined = pd.concat((new_major_bus_extract, stops_extract, rail_ferry_brt_extract))

In [52]:
points_combined.dtypes

calitp_itp_id_primary         int64
stop_id                      object
calitp_itp_id_secondary     float64
hqta_type                    object
geometry                   geometry
dtype: object

In [53]:
points_combined

Unnamed: 0,calitp_itp_id_primary,stop_id,calitp_itp_id_secondary,hqta_type,geometry
83,260,201,87.0,major_stop_bus,POINT (148659.946 -450548.591)
22774,300,1635,87.0,major_stop_bus,POINT (148621.643 -450481.683)
22786,300,1636,87.0,major_stop_bus,POINT (148592.137 -450480.964)
29797,87,193,87.0,major_stop_bus,POINT (148580.737 -450435.675)
51409,182,30006,87.0,major_stop_bus,POINT (148582.600 -450452.838)
...,...,...,...,...,...
14,127,43008,,major_stop_ferry,POINT (-210476.942 -21780.838)
15,127,43003,,major_stop_ferry,POINT (-217668.884 -14964.633)
17,127,43007,,major_stop_ferry,POINT (-215721.361 -13173.100)
13,280,890004,,major_stop_ferry,POINT (-207130.442 -9301.491)


In [54]:
# map_hqta(points_combined)

In [7]:
names = (tbl.views.gtfs_schedule_dim_feeds()
    >> filter(_.calitp_extracted_at < analysis_date, _.calitp_deleted_at > analysis_date)
    >> select(_.calitp_itp_id_primary == _.calitp_itp_id, _.agency_name_primary == _.calitp_agency_name)
    >> collect()
)

In [8]:
name_dict = names.set_index('calitp_itp_id_primary').to_dict()['agency_name_primary']

In [57]:
with_names = points_combined >> inner_join(_, names, on = 'calitp_itp_id_primary')

In [8]:
with_names['agency_name_secondary'] = with_names.apply(lambda x: name_dict[int(x.calitp_itp_id_secondary)]\
        if not np.isnan(x.calitp_itp_id_secondary) and int(x.calitp_itp_id_secondary) in name_dict.keys() else np.nan, axis = 1)

In [9]:
with_names['hqta_details'] = with_names.apply(hqta_details, axis=1)

In [59]:
with_names = with_names.drop_duplicates(subset=['calitp_itp_id_primary', 'hqta_type', 'stop_id'])

In [10]:
with_names.head(3)

Unnamed: 0,calitp_itp_id_primary,agency_name_primary,stop_id,hqta_type,calitp_itp_id_secondary,agency_name_secondary,geometry,hqta_details
0,260,Beach Cities Transit,201,major_stop_bus,87.0,,POINT (-118.39182 33.94913),intersection_2_bus_routes_different_operators
1,260,Beach Cities Transit,402,major_stop_bus,182.0,,POINT (-118.37030 33.89420),intersection_2_bus_routes_different_operators
2,260,Beach Cities Transit,401,major_stop_bus,182.0,,POINT (-118.36983 33.89507),intersection_2_bus_routes_different_operators


In [11]:
with_names >> count(_.hqta_type)

Unnamed: 0,hqta_type,n
0,hq_corridor_bus,33337
1,major_stop_brt,201
2,major_stop_bus,5530
3,major_stop_ferry,18
4,major_stop_rail,1081


In [5]:
analysis_date

datetime.date(2022, 6, 15)

In [13]:
with_names = with_names >> select(_.calitp_itp_id_primary, _.agency_name_primary, _.stop_id, _.hqta_type,
                              _.calitp_itp_id_secondary, _.agency_name_secondary, _.geometry)

In [64]:
# fs.mkdir(f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/')

In [65]:
# ## TODO add check that folder exists/mkdir...
# with_names.to_file(f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/ca_hq_transit_stops.geojsonl',
#                    driver='GeoJSONSeq')

In [3]:
with_names = gpd.read_file(f'./ca_hq_transit_stops.geojsonl')

In [4]:
with_names >> count(_.hqta_details)

Unnamed: 0,hqta_details,n
0,intersection_2_bus_routes_different_operators,839
1,intersection_2_bus_routes_same_operator,4691
2,major_stop_brt_single_operator,201
3,major_stop_ferry_single_operator,18
4,major_stop_rail_single_operator,1081
5,stop_along_hq_bus_corridor_single_operator,33337


In [7]:
with_names = with_names.to_crs(shared_utils.geography_utils.WGS84)

In [8]:
with_names.to_file('./ca_hq_transit_stops.geojsonl', driver='GeoJSONSeq')
with_names.to_file('./ca_hq_transit_stops.geojson', driver='GeoJSON')



In [1]:
## seems to hang, TODO wrapper function like for parquet?

# with_names.to_file(f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/ca_hq_transit_stops.geojsonl',
#                    driver='GeoJSONSeq')
# with_names.to_file(f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/ca_hq_transit_stops.geojson',
#                    driver='GeoJSON')

### Combining and Buffering

* General buffer distance: 1/2mi ~= 805 meters
* Bus corridors are already buffered 50 meters, so will buffer 755 meters

In [12]:
major_stops_named = with_names >> filter(_.hqta_type != 'hq_corridor_bus')

In [13]:
bus_hqtc.geometry = bus_hqtc.geometry.buffer(755)
major_stops_named.geometry = major_stops_named.buffer(805)
# row_per_stop.geometry = row_per_stop.geometry.buffer(805)
# rail_ferry_brt_stops.geometry = rail_ferry_brt_stops.geometry.buffer(805)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
major_stops_named >> head(2)

Unnamed: 0,calitp_itp_id_primary,agency_name_primary,stop_id,hqta_type,calitp_itp_id_secondary,agency_name_secondary,hqta_details,geometry
0,260,Beach Cities Transit,201,major_stop_bus,87.0,Culver CityBus,intersection_2_bus_routes_different_operators,"POLYGON ((686.60818 33.94913, 682.73188 -44.95..."
1,260,Beach Cities Transit,402,major_stop_bus,182.0,Metro,intersection_2_bus_routes_different_operators,"POLYGON ((686.62970 33.89420, 682.75340 -45.00..."


In [15]:
bus_hqtc >> head(2)

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3109,True,500,142,"MULTIPOLYGON (((211656.381 -486108.779, 211648...",0,2197551287,0,3977,10.0,10.0,,,hq_transit_corr,3
3110,True,500,235,"MULTIPOLYGON (((211656.381 -486108.779, 211648...",1,3570856290,0,3977,10.0,10.0,,,hq_transit_corr,3


In [16]:
bus_hqtc_formatted = bus_hqtc >> select(_.calitp_itp_id_primary == _.calitp_itp_id, _.stop_id, _.hqta_segment_id,
                                        _.am_max_trips, _.pm_max_trips, _.hqta_type, _.geometry)
bus_hqtc_formatted.hqta_type = 'hq_corridor_bus'
bus_hqtc_formatted.calitp_itp_id_primary = bus_hqtc_formatted.calitp_itp_id_primary.astype('int64')
bus_hqtc_formatted = bus_hqtc_formatted >> inner_join(_, names, on = 'calitp_itp_id_primary')

In [17]:
all_hqta = pd.concat([major_stops_named, bus_hqtc_formatted])

In [18]:
# tbl.gtfs_schedule.stops() >> filter(_.calitp_itp_id == 300, _.stop_id == '689')

In [19]:
# tbl.gtfs_schedule.stops() >> filter(_.stop_id == '315608')

In [20]:
# map_hqta(bus_hqtc, 'stop_id')

In [21]:
bus_hqtc >> filter(_.stop_id == '689')

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3529,True,25889,300,"MULTIPOLYGON (((146051.357 -439049.193, 146045...",0,582087555,6,689,8.0,8.0,,,hq_transit_corr,3


In [22]:
bus_hqtc >> filter(_.stop_id.isin(['315604', '315614']))

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3947,True,SamTrans89:170546,290,"MULTIPOLYGON (((-215212.254 -62247.563, -21521...",1,3580424079,34,315614,8.0,6.0,,,hq_transit_corr,3
3951,True,SamTrans89:2940211,290,"MULTIPOLYGON (((-204097.961 -49854.746, -20408...",1,1305915797,0,315604,6.0,6.0,,,hq_transit_corr,3


In [23]:
all_hqta = all_hqta >> filter(_.stop_id != '315604', _.stop_id != '315614', _.stop_id != '689')
## drop incorrect HMB data, TODO investigate
## drop incorrect Cheviot data, TODO investigate refactor (run shapes in frequency order...)

In [24]:
all_hqta = all_hqta >> select(_.calitp_itp_id_primary, _.calitp_itp_id_secondary, _.agency_name_primary, _.hqta_type, _.geometry)
all_hqta = all_hqta.reset_index(drop=True)
# all_hqta['calitp_itp_id'] = all_hqta['calitp_itp_id'].astype('int64')

In [25]:
all_hqta

Unnamed: 0,calitp_itp_id_primary,calitp_itp_id_secondary,agency_name_primary,hqta_type,geometry
0,260,87.0,Beach Cities Transit,major_stop_bus,"POLYGON ((686.60818 33.94913, 682.73188 -44.95..."
1,260,182.0,Beach Cities Transit,major_stop_bus,"POLYGON ((686.62970 33.89420, 682.75340 -45.00..."
2,260,182.0,Beach Cities Transit,major_stop_bus,"POLYGON ((686.63017 33.89507, 682.75387 -45.00..."
3,300,87.0,Big Blue Bus,major_stop_bus,"POLYGON ((686.60778 33.94974, 682.73148 -44.95..."
4,300,87.0,Big Blue Bus,major_stop_bus,"POLYGON ((686.60746 33.94975, 682.73116 -44.95..."
...,...,...,...,...,...
7679,61,,County Connection,hq_corridor_bus,"POLYGON ((-179744.16608 -7132.10166, -179742.7..."
7680,61,,County Connection,hq_corridor_bus,"POLYGON ((-179744.16608 -7132.10166, -179742.7..."
7681,61,,County Connection,hq_corridor_bus,"POLYGON ((-182650.55053 -866.36348, -182662.99..."
7682,61,,County Connection,hq_corridor_bus,"POLYGON ((-182650.55053 -866.36348, -182662.99..."


In [26]:
all_hqta['agency_name_secondary'] = all_hqta.apply(lambda x: name_dict[int(x.calitp_itp_id_secondary)]\
        if not np.isnan(x.calitp_itp_id_secondary) and int(x.calitp_itp_id_secondary) in name_dict.keys() else np.nan, axis = 1)

In [27]:
all_hqta['hqta_details'] = all_hqta.apply(hqta_details, axis=1)

In [28]:
all_hqta = all_hqta >> select(_.calitp_itp_id_primary, _.agency_name_primary, _.hqta_type,
                              _.calitp_itp_id_secondary, _.agency_name_secondary, _.hqta_details,
                              _.geometry)

In [110]:
# map_hqta(all_hqta, 'agency_name_primary')

In [83]:
# ## TODO add check that folder exists/mkdir...
# all_hqta.to_file(f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/ca_hq_transit_areas.geojsonl',
#                 driver='GeoJSONSeq')

In [11]:
all_hqta = gpd.read_file('./ca_hq_transit_areas.geojsonl')

In [12]:
all_hqta = all_hqta.to_crs(shared_utils.geography_utils.WGS84)

In [13]:
all_hqta.to_file('./ca_hq_transit_areas.geojsonl', driver='GeoJSONSeq')
all_hqta.to_file('./ca_hq_transit_areas.geojson', driver='GeoJSON')



### Filling Out Documentation

In [29]:
all_hqta >> head(3)

Unnamed: 0,calitp_itp_id_primary,agency_name_primary,hqta_type,calitp_itp_id_secondary,agency_name_secondary,hqta_details,geometry
0,260,Beach Cities Transit,major_stop_bus,87.0,Culver CityBus,intersection_2_bus_routes_different_operators,"POLYGON ((686.60818 33.94913, 682.73188 -44.95..."
1,260,Beach Cities Transit,major_stop_bus,182.0,Metro,intersection_2_bus_routes_different_operators,"POLYGON ((686.62970 33.89420, 682.75340 -45.00..."
2,260,Beach Cities Transit,major_stop_bus,182.0,Metro,intersection_2_bus_routes_different_operators,"POLYGON ((686.63017 33.89507, 682.75387 -45.00..."


In [86]:
all_hqta.dtypes

calitp_itp_id_primary         int64
agency_name_primary          object
hqta_type                    object
calitp_itp_id_secondary     float64
agency_name_secondary       float64
geometry                   geometry
dtype: object

In [87]:
with_names >> head(3)

Unnamed: 0,calitp_itp_id_primary,agency_name_primary,stop_id,hqta_type,calitp_itp_id_secondary,agency_name_secondary,geometry
0,260,Beach Cities Transit,201,major_stop_bus,87.0,,POINT (148659.946 -450548.591)
1,260,Beach Cities Transit,402,major_stop_bus,182.0,,POINT (150753.681 -456605.934)
2,260,Beach Cities Transit,401,major_stop_bus,182.0,,POINT (150795.591 -456508.930)


In [88]:
with_names.dtypes

calitp_itp_id_primary         int64
agency_name_primary          object
stop_id                      object
hqta_type                    object
calitp_itp_id_secondary     float64
agency_name_secondary       float64
geometry                   geometry
dtype: object

In [89]:
with_names.hqta_type.unique()

array(['major_stop_bus', 'hq_corridor_bus', 'major_stop_rail',
       'major_stop_brt', 'major_stop_ferry'], dtype=object)

In [90]:
all_hqta.hqta_type.unique()

array(['major_stop_bus', 'major_stop_rail', 'major_stop_brt',
       'major_stop_ferry', 'hq_corridor_bus'], dtype=object)

In [91]:
all_hqta.hqta_type.str.len().max()

16

In [92]:
all_hqta.agency_name_primary.str.len().max()

57