In [1]:
# !pip install -r requirements.txt

In [149]:
import calitp
from calitp.tables import tbl
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
import shapely

from ipyleaflet import Map, GeoJSON, projections, basemaps, GeoData, LayersControl, WidgetControl, GeoJSON
from ipywidgets import Text, HTML

from utilities import *

In [150]:
bus_hqtc = gpd.read_parquet(f'{GCS_FILE_PATH}intermediate/shape_dissolve.parquet')
bus_hqtc = bus_hqtc[bus_hqtc['hq_transit_corr']]
bus_hqtc['hqta_type'] = 'hq_transit_corr'
bus_hqtc['route_type'] = '3'

In [151]:
rail_ferry_brt_stops = gpd.read_parquet(f'{GCS_FILE_PATH}rail_brt_ferry.parquet')
rail_ferry_brt_stops['hqta_type'] = 'major_transit_stop'

In [152]:
rail_ferry_brt_stops.columns

Index(['calitp_itp_id', 'stop_id', 'stop_lat', 'stop_lon', 'stop_name',
       'route_type', 'geometry', 'hqta_type'],
      dtype='object')

In [6]:
# geoparquet_gcs_export(bus_hqtc, 'bus_hqtc')

In [7]:
# geoparquet_gcs_export(rail_ferry_brt_stops, 'rail_ferry_brt_stops')

### High Quality Transit Areas Relevant Statutes

[PRC 21155](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21155.&lawCode=PRC)
* _(3) be within one-half mile of a major transit stop or high-quality transit corridor included in a regional transportation plan._
* Major transit stop definition: _A major transit stop is as defined in Section 21064.3, except that, for purposes of this section, it also includes major transit stops that are included in the applicable regional transportation plan_
* High-quality transit corridor definition: _For purposes of this section, a high-quality transit corridor means a corridor with fixed route bus service with service intervals no longer than 15 minutes during peak commute hours._
    * Unable to locate definition of "peak commute hours"

[PRC 21064.3](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?sectionNum=21064.3.&lawCode=PRC)
* _Major transit stop means a site containing any of the following:
(a) An existing rail or bus rapid transit station.
(b) A ferry terminal served by either a bus or rail transit service.
(c) The intersection of two or more major bus routes with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods._
    * "Intersection" may not be sufficiently well-defined for this analysis

[PRC 21060.2](https://leginfo.legislature.ca.gov/faces/codes_displaySection.xhtml?lawCode=PRC&sectionNum=21060.2.&highlight=true&keyword=bus%20rapid%20transit)
* _(a) “Bus rapid transit” means a public mass transit service provided by a public agency or by a public-private partnership that includes all of the following features:
(1) Full-time dedicated bus lanes or operation in a separate right-of-way dedicated for public transportation with a frequency of service interval of 15 minutes or less during the morning and afternoon peak commute periods.
(2) Transit signal priority.
(3) All-door boarding.
(4) Fare collection system that promotes efficiency.
(5) Defined stations._
    * Unlikely to determine if a service qualifies as BRT under this definition using GTFS alone

## Bus Major Stops

In [8]:
bus_hqtc.head(3)

Unnamed: 0,hq_transit_corr,shape_id,geometry,calitp_itp_id,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
2755,True,500,"MULTIPOLYGON (((212402.267 -486786.382, 212428...",142,2197551287,0,3977,10.0,9.0,,,hq_transit_corr,3
2756,True,843,"MULTIPOLYGON (((200815.727 -472198.441, 200811...",142,1555422069,0,6679,5.0,6.0,,,hq_transit_corr,3
2758,True,10,"MULTIPOLYGON (((170484.804 -424390.901, 170484...",243,3850303330,1,147,10.0,12.0,,,hq_transit_corr,3


In [9]:
gdf = bus_hqtc
output = gpd.GeoDataFrame()

def find_intersections(row):
    global output
    # display(row)
    # display(row.shape_id)
    this_row = gdf >> filter(_.shape_id == row.shape_id)
    not_this_row = gdf >> filter(_.shape_id != row.shape_id)
    # print(type(filtered))
    clip_row = gpd.clip(this_row, not_this_row)
    output = pd.concat((output, clip_row))
    return

In [10]:
from tqdm import tqdm

In [11]:
tqdm.pandas()

In [12]:
## ran 5/17
# _test = bus_hqtc.progress_apply(find_intersections, axis=1)

In [13]:
# output.geometry = output.geometry.buffer(50)

In [14]:
# shared_utils.utils.geoparquet_gcs_export(output, f'{GCS_FILE_PATH}', f'major_bus_stops_working')

In [15]:
major_bus = gpd.read_parquet(f'{GCS_FILE_PATH}major_bus_stops_working.parquet')

In [16]:
# map_hqta(major_bus)

In [17]:
def drop_big_areas(geometry):
    if type(geometry) == shapely.geometry.multipolygon.MultiPolygon:
        filtered = [x for x in list(geometry.geoms) if x.length < 1000]
        if len(filtered) > 0:
            return shapely.geometry.MultiPolygon(filtered)
    elif type(geometry) == shapely.geometry.polygon.Polygon:
        if geometry.length < 1000:
            return geometry
    else:
        return np.nan

In [18]:
major_bus['geometry'] = major_bus.geometry.apply(drop_big_areas)

In [19]:
row_per_stop = gpd.GeoDataFrame()
def explode_geoms(row):
    global row_per_stop
    if type(row.geometry) == shapely.geometry.multipolygon.MultiPolygon:
        for geom in row.geometry.geoms:
            row.geometry = geom.centroid
            row_per_stop = pd.concat((row_per_stop, row))
    elif type(row.geometry) == shapely.geometry.polygon.Polygon:
        row.geometry = row.geometry.centroid
        row_per_stop = pd.concat((row_per_stop, row))
    
    return row

In [23]:
# major_bus = major_bus.dropna(subset=['geometry'])

# major_bus.apply(explode_geoms, axis=1)

# row_per_stop = row_per_stop.reset_index(drop=True)

# row_per_stop = row_per_stop[['calitp_itp_id', 'stop_id', 'geometry']]

# row_per_stop['hqta_type'] = 'major_transit_stop'

# row_per_stop = row_per_stop.set_crs(
#     shared_utils.geography_utils.CA_NAD83Albers)

In [33]:
row_per_stop = gpd.GeoDataFrame(row_per_stop, geometry=row_per_stop.geometry,
                               crs = shared_utils.geography_utils.CA_NAD83Albers)

In [24]:
# shared_utils.utils.geoparquet_gcs_export(row_per_stop, f'{GCS_FILE_PATH}', f'major_bus_stops')

In [34]:
major_bus_stops = gpd.read_parquet(f'{GCS_FILE_PATH}major_bus_stops.parquet')

In [35]:
major_bus_stops.head(3)

Unnamed: 0,calitp_itp_id,stop_id,geometry,hqta_type
0,142,3977,POINT (212896.375 -487200.839),major_transit_stop
1,142,3977,POINT (212351.822 -486889.713),major_transit_stop
2,142,3977,POINT (198974.208 -471136.313),major_transit_stop


In [36]:
analysis_date = dt.date(2022, 5, 4) ## Wed, May 4

In [37]:
tbl_stops = (tbl.views.gtfs_schedule_fact_daily_feed_stops()
 >> filter(_.date == analysis_date)
 >> filter(_.calitp_extracted_at < analysis_date)
 >> filter(_.calitp_deleted_at > analysis_date)
 >> select(_.stop_key)
 >> inner_join(_, tbl.views.gtfs_schedule_dim_stops(), on = 'stop_key')
 >> select(_.stop_id, _.stop_lat, _.stop_lon, _.calitp_itp_id)
 >> filter(_.calitp_itp_id != 200)
 >> collect()
)

In [38]:
tbl_stops = gpd.GeoDataFrame(tbl_stops,
                 geometry = gpd.points_from_xy(tbl_stops.stop_lon, tbl_stops.stop_lat),
                 crs = 'EPSG:4326').to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [39]:
tbl_stops.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,53066,38.756509,-121.254002,271,POINT (-108857.993 82995.480)
1,ARN,38.903831,-121.082922,56,POINT (-93820.462 99186.374)
2,BKY,37.867439,-122.300908,56,POINT (-202122.815 -14102.903)


In [40]:
to_join = major_bus_stops[['calitp_itp_id']].astype({'calitp_itp_id': 'int64'})

In [41]:
## all stops for an operator with at least 1 major stop
tbl_stops_major = (tbl_stops
         >> inner_join(_, to_join, on=['calitp_itp_id'])
         >> distinct(_.calitp_itp_id, _.stop_id, _keep_all=True)
            )


In [42]:
tbl_stops_major.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,7264,37.957347,-121.293516,284,POINT (-113499.931 -5786.611)
1,7009,37.996681,-121.314823,284,POINT (-115308.763 -1388.681)
2,4292,37.795916,-120.994762,284,POINT (-87475.072 -24047.759)


In [43]:
tbl_stops_major.geometry = tbl_stops_major.buffer(100)

In [44]:
tbl_stops_major.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,7264,37.957347,-121.293516,284,"POLYGON ((-113399.931 -5786.611, -113400.413 -..."
1,7009,37.996681,-121.314823,284,"POLYGON ((-115208.763 -1388.681, -115209.244 -..."
2,4292,37.795916,-120.994762,284,"POLYGON ((-87375.072 -24047.759, -87375.554 -2..."


In [45]:
drop_id = major_bus_stops.drop(columns=['stop_id'])

In [46]:
spatial_stops_major = tbl_stops_major.sjoin(drop_id, how='inner', predicate='contains')

In [47]:
new_major_stops = spatial_stops_major.drop_duplicates(subset=['stop_id', 'calitp_itp_id_left', 'calitp_itp_id_right'])

In [48]:
new_major_stops.geometry = new_major_stops.centroid

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [49]:
# map_hqta(new_major_stops)

### Bus Corridors to Stops Along Corridor

In [50]:
tbl_stops_corridors = tbl_stops

In [51]:
tbl_stops_corridors.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
0,53066,38.756509,-121.254002,271,POINT (-108857.993 82995.480)
1,ARN,38.903831,-121.082922,56,POINT (-93820.462 99186.374)
2,BKY,37.867439,-122.300908,56,POINT (-202122.815 -14102.903)


In [52]:
bus_hqtc.head(3)

Unnamed: 0,hq_transit_corr,shape_id,geometry,calitp_itp_id,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
2755,True,500,"MULTIPOLYGON (((212402.267 -486786.382, 212428...",142,2197551287,0,3977,10.0,9.0,,,hq_transit_corr,3
2756,True,843,"MULTIPOLYGON (((200815.727 -472198.441, 200811...",142,1555422069,0,6679,5.0,6.0,,,hq_transit_corr,3
2758,True,10,"MULTIPOLYGON (((170484.804 -424390.901, 170484...",243,3850303330,1,147,10.0,12.0,,,hq_transit_corr,3


In [53]:
stops_in_corridor = tbl_stops_corridors.clip(bus_hqtc)

In [54]:
stops_in_corridor.drop_duplicates(subset = ['stop_id', 'calitp_itp_id']).info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 33188 entries, 44635 to 128254
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   stop_id        33188 non-null  object  
 1   stop_lat       33188 non-null  float64 
 2   stop_lon       33188 non-null  float64 
 3   calitp_itp_id  33188 non-null  int64   
 4   geometry       33188 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(1)
memory usage: 1.5+ MB


In [55]:
# map_hqta(bus_hqtc)

In [56]:
# stop_subset = stops_in_corridor >> head(2000)

In [57]:
# map_hqta(stop_subset)

#### Definitions and Output:

* hqta_type: major_transit_stop
* stop_id: one stop id... (not ideal, but oh well)
* geometry: .buffer(700)?
* _can alway pull more info spatially_

## Unbuffered Export

Thank you for this data. It would be useful for us to get the HQTC stops as a point data file, not a polygon. Also, if you could differentiate between train, bus, BRT, and ferry stop that would be immensely helpful.

Let me know if it is possible to get the data in this format.  

#### Major Transit Stops (bus intersections)

In [58]:
new_major_stops.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id_left,geometry,index_right,calitp_itp_id_right,hqta_type
43,7001,37.958571,-121.273702,284,POINT (-111759.624 -5674.094),1390,284,major_transit_stop
738,3001,37.958339,-121.275023,284,POINT (-111875.873 -5698.324),1390,284,major_transit_stop
818,3000,37.958289,-121.275021,284,POINT (-111875.746 -5703.829),1390,284,major_transit_stop


In [59]:
new_major_bus_extract = new_major_stops >> select(_.calitp_itp_id_primary == _.calitp_itp_id_left, _.stop_id,
                          _.calitp_itp_id_secondary == _.calitp_itp_id_right, _.hqta_type, _.geometry
                         )
new_major_bus_extract['hqta_type'] = 'major_stop_bus'
new_major_bus_extract.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,calitp_itp_id_secondary,hqta_type,geometry
43,284,7001,284,major_stop_bus,POINT (-111759.624 -5674.094)
738,284,3001,284,major_stop_bus,POINT (-111875.873 -5698.324)
818,284,3000,284,major_stop_bus,POINT (-111875.746 -5703.829)


#### Stops Along HQ Transit Corridors (bus)

In [60]:
stops_in_corridor.head(3)

Unnamed: 0,stop_id,stop_lat,stop_lon,calitp_itp_id,geometry
44635,5943533,34.154372,-119.177909,231,POINT (75799.261 -428713.584)
45204,3290234,34.154601,-119.180248,231,POINT (75583.384 -428690.047)
12535,PVRCST:2,34.154601,-119.180248,123,POINT (75583.400 -428690.022)


In [61]:
stops_extract = stops_in_corridor >> select(_.calitp_itp_id_primary == _.calitp_itp_id,
                                            _.stop_id, _.geometry)

In [62]:
stops_extract['hqta_type'] = 'hq_corridor_bus'

In [63]:
stops_extract.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,geometry,hqta_type
44635,231,5943533,POINT (75799.261 -428713.584),hq_corridor_bus
45204,231,3290234,POINT (75583.384 -428690.047),hq_corridor_bus
12535,123,PVRCST:2,POINT (75583.400 -428690.022),hq_corridor_bus


#### Major Transit Stops (rail/ferry/brt)

In [64]:
rail_ferry_brt_stops.head(3)

Unnamed: 0,calitp_itp_id,stop_id,stop_lat,stop_lon,stop_name,route_type,geometry,hqta_type
101,278,75005,32.569515,-117.066965,Iris Avenue Station,0,POINT (275828.261 -600432.937),major_transit_stop
35,278,75004,32.569762,-117.067176,Iris Avenue Station,0,POINT (275807.549 -600406.211),major_transit_stop
86,278,75007,32.584454,-117.08386,Palm Avenue Station,0,POINT (274189.052 -598828.705),major_transit_stop


In [65]:
rail_ferry_brt_extract = rail_ferry_brt_stops >> select(_.calitp_itp_id == _.calitp_itp_id, _.stop_id,
                                      _.hqta_type, _.route_type, _.geometry
                         )

In [66]:
rail_ferry_brt_extract = (rail_ferry_brt_extract
    >> mutate(
    hqta_type = case_when({
        _.route_type.isin(['0', '1', '2']): 'major_stop_rail',
        _.route_type == '3': 'major_stop_brt',
        _.route_type == '4': 'major_stop_ferry'
        })
    )
    >> select(-_.route_type, _.calitp_itp_id_primary == _.calitp_itp_id)
)

In [67]:
rail_ferry_brt_extract.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,hqta_type,geometry
101,278,75005,major_stop_rail,POINT (275828.261 -600432.937)
35,278,75004,major_stop_rail,POINT (275807.549 -600406.211)
86,278,75007,major_stop_rail,POINT (274189.052 -598828.705)


In [68]:
points_combined = pd.concat((new_major_bus_extract, stops_extract, rail_ferry_brt_extract))

In [69]:
points_combined.dtypes

calitp_itp_id_primary         int64
stop_id                      object
calitp_itp_id_secondary      object
hqta_type                    object
geometry                   geometry
dtype: object

In [70]:
points_combined

Unnamed: 0,calitp_itp_id_primary,stop_id,calitp_itp_id_secondary,hqta_type,geometry
43,284,7001,284,major_stop_bus,POINT (-111759.624 -5674.094)
738,284,3001,284,major_stop_bus,POINT (-111875.873 -5698.324)
818,284,3000,284,major_stop_bus,POINT (-111875.746 -5703.829)
83,284,7006,284,major_stop_bus,POINT (-112820.565 -6058.798)
272,284,7155,284,major_stop_bus,POINT (-112842.308 -6118.083)
...,...,...,...,...,...
13,127,43008,,major_stop_ferry,POINT (-210476.942 -21780.838)
14,127,43003,,major_stop_ferry,POINT (-217668.884 -14964.633)
16,127,43007,,major_stop_ferry,POINT (-215721.361 -13173.100)
9,280,890004,,major_stop_ferry,POINT (-207130.442 -9301.491)


In [71]:
# map_hqta(points_combined)

In [182]:
names = (tbl.views.gtfs_schedule_dim_feeds()
    >> filter(_.calitp_extracted_at < analysis_date, _.calitp_deleted_at > analysis_date)
    >> select(_.calitp_itp_id_primary == _.calitp_itp_id, _.agency_name_primary == _.calitp_agency_name)
    >> collect()
)

In [183]:
name_dict = names.set_index('calitp_itp_id_primary').to_dict()['agency_name_primary']

In [193]:
with_names = points_combined >> inner_join(_, names, on = 'calitp_itp_id_primary')

In [200]:
# with_names.calitp_itp_id_secondary = with_names.calitp_itp_id_secondary.astype('float64')
with_names['agency_name_secondary'] = with_names.apply(lambda x: name_dict[int(x.calitp_itp_id_secondary)]\
                                                       if type(x.calitp_itp_id_secondary) == str and int(x.calitp_itp_id_secondary) in name_dict.keys() else np.nan, axis = 1)

In [207]:
with_names = with_names.drop_duplicates(subset=['calitp_itp_id_primary', 'hqta_type', 'stop_id'])

In [208]:
with_names.head(3)

Unnamed: 0,calitp_itp_id_primary,stop_id,calitp_itp_id_secondary,hqta_type,geometry,agency_name_primary,agency_name_secondary
0,284,7001,284,major_stop_bus,POINT (-111759.624 -5674.094),San Joaquin Regional Transit District,San Joaquin Regional Transit District
1,284,3001,284,major_stop_bus,POINT (-111875.873 -5698.324),San Joaquin Regional Transit District,San Joaquin Regional Transit District
2,284,3000,284,major_stop_bus,POINT (-111875.746 -5703.829),San Joaquin Regional Transit District,San Joaquin Regional Transit District


In [209]:
with_names >> count(_.hqta_type)

Unnamed: 0,hqta_type,n
0,hq_corridor_bus,33188
1,major_stop_brt,207
2,major_stop_bus,5431
3,major_stop_ferry,18
4,major_stop_rail,1439


In [210]:
analysis_date

datetime.date(2022, 5, 4)

In [227]:
with_names = with_names >> select(_.calitp_itp_id_primary, _.agency_name_primary, _.stop_id, _.hqta_type,
                              _.calitp_itp_id_secondary, _.agency_name_secondary, _.geometry)

In [228]:
## TODO add check that folder exists/mkdir...
shared_utils.utils.geoparquet_gcs_export(with_names, f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/', f'ca_hq_transit_stops')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



In [229]:
with_names.to_file('./ca_hq_transit_stops.geojson', driver='GeoJSON')



### Combining and Buffering

* General buffer distance: 1/2mi ~= 805 meters
* Bus corridors are already buffered 50 meters, so will buffer 755 meters

In [153]:
major_stops_named = with_names >> filter(_.hqta_type != 'hq_corridor_bus')

In [154]:
bus_hqtc.geometry = bus_hqtc.geometry.buffer(755)
major_stops_named.geometry = major_stops_named.buffer(805)
# row_per_stop.geometry = row_per_stop.geometry.buffer(805)
# rail_ferry_brt_stops.geometry = rail_ferry_brt_stops.geometry.buffer(805)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [155]:
major_stops_named >> head(2)

Unnamed: 0,calitp_itp_id_primary,stop_id,calitp_itp_id_secondary,hqta_type,geometry,agency_name_primary
0,284,7001,284,major_stop_bus,"POLYGON ((-110954.624 -5674.094, -110958.500 -...",San Joaquin Regional Transit District
1,284,3001,284,major_stop_bus,"POLYGON ((-111070.873 -5698.324, -111074.750 -...",San Joaquin Regional Transit District


In [156]:
bus_hqtc >> head(2)

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3293,True,500,142,"MULTIPOLYGON (((211656.381 -486108.779, 211648...",0,2197551287,0,3977,10.0,9.0,,,hq_transit_corr,3
3294,True,500,235,"MULTIPOLYGON (((211656.381 -486108.779, 211648...",0,3570856290,0,3977,10.0,9.0,,,hq_transit_corr,3


In [157]:
bus_hqtc_formatted = bus_hqtc >> select(_.calitp_itp_id_primary == _.calitp_itp_id, _.stop_id, _.hqta_segment_id,
                                        _.am_max_trips, _.pm_max_trips, _.hqta_type, _.geometry)
bus_hqtc_formatted.hqta_type = 'hq_corridor_bus'
bus_hqtc_formatted.calitp_itp_id_primary = bus_hqtc_formatted.calitp_itp_id_primary.astype('int64')
bus_hqtc_formatted = bus_hqtc_formatted >> inner_join(_, names, on = 'calitp_itp_id_primary')

In [158]:
all_hqta = pd.concat([major_stops_named, bus_hqtc_formatted])

In [159]:
# tbl.gtfs_schedule.stops() >> filter(_.calitp_itp_id == 300, _.stop_id == '689')

In [160]:
# tbl.gtfs_schedule.stops() >> filter(_.stop_id == '315608')

In [161]:
bus_hqtc >> filter(_.stop_id == '689')

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3710,True,25889,300,"MULTIPOLYGON (((146051.357 -439049.193, 146045...",0,582087555,6,689,8.0,8.0,,,hq_transit_corr,3


In [162]:
bus_hqtc >> filter(_.stop_id == '315608')

Unnamed: 0,hq_transit_corr,shape_id,calitp_itp_id,geometry,calitp_url_number,hqta_segment_id,segment_sequence,stop_id,am_max_trips,pm_max_trips,departure_hour,n,hqta_type,route_type
3489,True,170521,290,"MULTIPOLYGON (((-215212.280 -62248.311, -21521...",0,1040658561,0,315608,10.0,6.0,,,hq_transit_corr,3


In [163]:
all_hqta = all_hqta >> filter(_.stop_id != '315608', _.stop_id != '689')
## drop incorrect HMB data, TODO investigate
## drop incorrect Cheviot data, TODO investigate refactor (run shapes in frequency order...)

In [164]:
all_hqta = all_hqta >> select(_.calitp_itp_id_primary, _.calitp_itp_id_secondary, _.agency_name_primary, _.hqta_type, _.geometry)
all_hqta = all_hqta.reset_index(drop=True)
# all_hqta['calitp_itp_id'] = all_hqta['calitp_itp_id'].astype('int64')

In [165]:
all_hqta

Unnamed: 0,calitp_itp_id_primary,calitp_itp_id_secondary,agency_name_primary,hqta_type,geometry
0,284,284,San Joaquin Regional Transit District,major_stop_bus,"POLYGON ((-110954.624 -5674.094, -110958.500 -..."
1,284,284,San Joaquin Regional Transit District,major_stop_bus,"POLYGON ((-111070.873 -5698.324, -111074.750 -..."
2,284,284,San Joaquin Regional Transit District,major_stop_bus,"POLYGON ((-111070.746 -5703.829, -111074.622 -..."
3,284,284,San Joaquin Regional Transit District,major_stop_bus,"POLYGON ((-112015.565 -6058.798, -112019.441 -..."
4,284,284,San Joaquin Regional Transit District,major_stop_bus,"POLYGON ((-112037.308 -6118.083, -112041.184 -..."
...,...,...,...,...,...
7955,4,,AC Transit,hq_corridor_bus,"MULTIPOLYGON (((-201047.727 -20653.568, -20107..."
7956,4,,AC Transit,hq_corridor_bus,"MULTIPOLYGON (((-201047.727 -20653.568, -20107..."
7957,293,,Santa Barbara Metropolitan Transit District,hq_corridor_bus,"MULTIPOLYGON (((18757.773 -397024.907, 18777.7..."
7958,293,,Santa Barbara Metropolitan Transit District,hq_corridor_bus,"POLYGON ((26470.781 -399176.692, 26461.762 -39..."


In [213]:
all_hqta['agency_name_secondary'] = all_hqta.apply(lambda x: name_dict[int(x.calitp_itp_id_secondary)]\
                                                       if type(x.calitp_itp_id_secondary) == str and int(x.calitp_itp_id_secondary) in name_dict.keys() else np.nan, axis = 1)

In [224]:
all_hqta = all_hqta >> select(_.calitp_itp_id_primary, _.agency_name_primary, _.hqta_type,
                              _.calitp_itp_id_secondary, _.agency_name_secondary, _.geometry)

In [225]:
## TODO add check that folder exists/mkdir...
shared_utils.utils.geoparquet_gcs_export(all_hqta, f'{GCS_FILE_PATH}export/{analysis_date.isoformat()}/', f'ca_hq_transit_areas')


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



In [226]:
all_hqta.to_file('./ca_hq_transit_areas.geojson', driver='GeoJSON')



### Filling Out Documentation

In [230]:
all_hqta >> head(3)

Unnamed: 0,calitp_itp_id_primary,agency_name_primary,hqta_type,calitp_itp_id_secondary,agency_name_secondary,geometry
0,284,San Joaquin Regional Transit District,major_stop_bus,284,San Joaquin Regional Transit District,"POLYGON ((-110954.624 -5674.094, -110958.500 -..."
1,284,San Joaquin Regional Transit District,major_stop_bus,284,San Joaquin Regional Transit District,"POLYGON ((-111070.873 -5698.324, -111074.750 -..."
2,284,San Joaquin Regional Transit District,major_stop_bus,284,San Joaquin Regional Transit District,"POLYGON ((-111070.746 -5703.829, -111074.622 -..."


In [231]:
all_hqta.dtypes

calitp_itp_id_primary         int64
agency_name_primary          object
hqta_type                    object
calitp_itp_id_secondary      object
agency_name_secondary        object
geometry                   geometry
dtype: object

In [232]:
with_names >> head(3)

Unnamed: 0,calitp_itp_id_primary,agency_name_primary,stop_id,hqta_type,calitp_itp_id_secondary,agency_name_secondary,geometry
0,284,San Joaquin Regional Transit District,7001,major_stop_bus,284,San Joaquin Regional Transit District,POINT (-111759.624 -5674.094)
1,284,San Joaquin Regional Transit District,3001,major_stop_bus,284,San Joaquin Regional Transit District,POINT (-111875.873 -5698.324)
2,284,San Joaquin Regional Transit District,3000,major_stop_bus,284,San Joaquin Regional Transit District,POINT (-111875.746 -5703.829)


In [233]:
with_names.dtypes

calitp_itp_id_primary         int64
agency_name_primary          object
stop_id                      object
hqta_type                    object
calitp_itp_id_secondary      object
agency_name_secondary        object
geometry                   geometry
dtype: object

In [234]:
with_names.hqta_type.unique()

array(['major_stop_bus', 'hq_corridor_bus', 'major_stop_rail',
       'major_stop_ferry', 'major_stop_brt'], dtype=object)

In [219]:
all_hqta.hqta_type.unique()

array(['major_stop_bus', 'major_stop_rail', 'major_stop_ferry',
       'major_stop_brt', 'hq_corridor_bus'], dtype=object)

In [237]:
all_hqta.hqta_type.str.len().max()

16

In [238]:
all_hqta.agency_name_primary.str.len().max()

57