In [1]:
import sys

In [2]:
import intake
import pandas as pd
import geopandas as gpd
from calitp_data_analysis import geography_utils
from siuba import *
import shapely

In [3]:
from shared_utils import catalog_utils, rt_dates
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [4]:
from segment_speed_utils import helpers

# Exploratory Analysis

Start working on how to identify and count "border zones" where transit runs along Census Tract boundaries and VRH/VRM should be split accordingly (avoiding double-counting or arbitrary allocation to only one border tract)

In [5]:
catalog = intake.open_catalog("*.yml")

In [6]:
def read_census_tracts(
    crs: str = geography_utils.CA_NAD83Albers_m,
    cols: list = ["Tract", "pop_sq_mi", "geometry"]
) -> gpd.GeoDataFrame:
    census_tracts = (
        catalog.calenviroscreen_lehd_by_tract.read()
        .to_crs(crs)
        [cols]
    ).rename(columns={'Tract':'tract'})
    return census_tracts

In [7]:
tracts = read_census_tracts(cols=['Tract', 'geometry'])

In [8]:
analysis_date = rt_dates.DATES['feb2025']

In [9]:
shapes = helpers.import_scheduled_shapes(analysis_date)

In [10]:
# shapes.explore()

In [11]:
BORDER_BUFFER_METERS = 35

In [15]:
def intersection_hash(row):
    '''
    Get unique hash of intersection zones.
    No need to keep both t1 x t2 and t2 x t1
    '''
    t1 = int(row.tract_1[2:]) #  drop state code
    t2 = int(row.tract_2[2:])
    row_tracts = [t1, t2]
    row_tracts.sort() #  modifies inplace
    return hash(tuple(row_tracts))

In [57]:
def find_borders(tracts_gdf: gpd.GeoDataFrame,
                border_buffer: int = BORDER_BUFFER_METERS
) -> gpd.GeoDataFrame:
    '''
    '''
    tracts_gdf = tracts_gdf.copy()
    tracts_gdf.geometry = tracts_gdf.buffer(border_buffer)
    borders = gpd.overlay(tracts_gdf, tracts_gdf)
    borders = borders[borders['tract_1'] != borders['tract_2']]
    borders['intersection_hash'] = borders.apply(intersection_hash, axis=1)
    borders = borders.drop_duplicates(subset=['intersection_hash'])
    return borders

In [58]:
borders = find_borders(tracts)

In [59]:
borders.head(3)

Unnamed: 0,tract_1,tract_2,geometry,intersection_hash
1,6001404300,6001400100,"POLYGON ((-196294.855 -15866.615, -196291.388 ...",810165257910103043
2,6001404400,6001400100,"POLYGON ((-196332.808 -15913.026, -196332.893 ...",-6603333591490277215
3,6001421600,6001400100,"POLYGON ((-197285.488 -12214.433, -197283.307 ...",3286629633234426559


## interpolation points

In [63]:
shapes.head(3)

Unnamed: 0,shape_array_key,geometry
0,7d199439c94815cbf76cc002feb4de7d,"LINESTRING (330989.175 -426478.193, 330965.137..."
1,a3f097f45165a591d54d808ed870d871,"LINESTRING (89405.704 -40294.539, 89409.725 -4..."
2,987fd928878a31c5fa38c91903cd81ed,"LINESTRING (180091.818 -446409.639, 180061.887..."


In [62]:
trip_cols = ['gtfs_dataset_key', 'name', 'trip_id',
        'shape_id', 'shape_array_key', 'route_id',
        'route_key', 'direction_id', 'route_short_name',
        'trip_instance_key', 'feed_key']

trips = (helpers.import_scheduled_trips(analysis_date, columns=trip_cols)
        .dropna(subset=['shape_id'])
        )

In [35]:
act_6 = trips.query('name.str.contains("AC Transit") & shape_id == "shp-6-03"')

In [36]:
act_6_shape = shapes.query('shape_array_key.isin(@act_6.shape_array_key)')

In [39]:
def overlay_to_borders(
    shape_gdf: gpd.GeoDataFrame,
    border_gdf: gpd.GeoDataFrame,
    sensitivity_dist: int = BORDER_BUFFER_METERS * 4
                 ):
    '''
    
    '''
    overlaid = shape_gdf.overlay(border_gdf, how='intersection')
    overlaid = overlaid.query('geometry.length > @sensitivity_dist')
    return overlaid

In [45]:
def overlay_to_tracts(
    shape_gdf_no_border: gpd.GeoDataFrame,
    tract_gdf: gpd.GeoDataFrame,
                 ):
    '''
    
    '''
    tract_gdf = tract_gdf[['tract', 'geometry']]
    return shape_gdf_no_border.overlay(tract_gdf, how='intersection')

In [82]:
def overlay_tracts_borders(
    shape_gdf: gpd.GeoDataFrame,
    tract_gdf: gpd.GeoDataFrame,
    border_gdf: gpd.GeoDataFrame,
    sensitivity_dist: int = BORDER_BUFFER_METERS * 4
):
    '''
    '''
    try:
        border_overlaid = overlay_to_borders(shape_gdf, borders)
        not_border = shape_gdf.overlay(border_overlaid, how='difference')
        tract_overlaid = overlay_to_tracts(not_border, tracts)
        tracts_and_borders = (pd.concat([tract_overlaid, border_overlaid])
                              .reset_index(drop=True)
                             )
        tracts_and_borders = tracts_and_borders.assign(
            border = ~tracts_and_borders.tract_2.isna(),
            start = tracts_and_borders.geometry.apply(lambda x: shapely.Point(x.coords[0])),
            end = tracts_and_borders.geometry.apply(lambda x: shapely.Point(x.coords[-1])),
            tsi_segment_id = tracts_and_borders.tract.combine_first(tracts_and_borders.intersection_hash)
        )
        return tracts_and_borders
    except Exception as e:
        print(f'{shape_gdf}, {e}')

In [86]:
overlay_tracts_borders(shapes.loc[1:1], tracts, borders)

Unnamed: 0,shape_array_key,tract,geometry,tract_1,tract_2,intersection_hash,border,start,end,tsi_segment_id
0,a3f097f45165a591d54d808ed870d871,6051000200,"LINESTRING (89405.704 -40294.539, 89409.725 -4...",,,,False,POINT (89405.704 -40294.539),POINT (91775.627 -41430.005),6051000200


In [95]:
shape_gdf = shapes.loc[2:2]

### Whittier example 

987fd928878a31c5fa38c91903cd81ed

In [119]:
shape_gdf

Unnamed: 0,shape_array_key,geometry
2,987fd928878a31c5fa38c91903cd81ed,"LINESTRING (180091.818 -446409.639, 180061.887..."


In [96]:
border_overlaid = overlay_to_borders(shape_gdf, borders)
not_border = shape_gdf.overlay(border_overlaid, how='difference')
tract_overlaid = overlay_to_tracts(not_border, tracts)
tracts_and_borders = pd.concat([tract_overlaid, border_overlaid]).reset_index(drop=True)

In [105]:
borders

Unnamed: 0,tract_1,tract_2,geometry,intersection_hash
1,06001404300,06001400100,"POLYGON ((-196294.855 -15866.615, -196291.388 ...",810165257910103043
2,06001404400,06001400100,"POLYGON ((-196332.808 -15913.026, -196332.893 ...",-6603333591490277215
3,06001421600,06001400100,"POLYGON ((-197285.488 -12214.433, -197283.307 ...",3286629633234426559
4,06001422600,06001400100,"POLYGON ((-197290.037 -12678.788, -197286.751 ...",2982289320613360605
5,06001422700,06001400100,"POLYGON ((-197215.664 -13930.646, -197212.889 ...",-4431209528787019653
...,...,...,...,...
59708,06115040303,06115040302,"POLYGON ((-134551.498 122994.216, -134555.044 ...",390392221421106355
59710,06115040500,06115040302,"POLYGON ((-135094.847 123008.145, -135096.165 ...",-4112068886901403029
59714,06115040303,06115040301,"POLYGON ((-134384.030 124648.695, -134381.528 ...",8986987643166269919
59719,06115040500,06115040303,"POLYGON ((-134552.206 123064.212, -134548.648 ...",-2150769350657280594


In [117]:
m = borders.clip(tracts_and_borders.buffer(3000)).explore()

shapes can leave and re-enter tracts, need to handle.

* some short segments can be dropped 
* longer ones should be preserved and given unique ids...

In [None]:
tracts_and_borders.explode().reset_index().explore(column='level_1', m=m)

### continue

In [94]:
overlay_tracts_borders(act_6_shape, tracts, borders)

Unnamed: 0,shape_array_key,tract,geometry,tract_1,tract_2,intersection_hash,border,start,end,tsi_segment_id
0,3caab5c44277cbdc8fbc755bc0ea7633,6001400300.0,"LINESTRING (-198798.506 -17437.270, -198790.77...",,,,False,POINT (-198798.506 -17437.270),POINT (-198749.617 -17116.295),6001400300.0
1,3caab5c44277cbdc8fbc755bc0ea7633,6001401100.0,"LINESTRING (-199161.264 -19021.055, -199159.31...",,,,False,POINT (-199161.264 -19021.055),POINT (-198815.272 -17561.412),6001401100.0
2,3caab5c44277cbdc8fbc755bc0ea7633,6001401300.0,"LINESTRING (-199465.932 -20338.104, -199465.71...",,,,False,POINT (-199465.932 -20338.104),POINT (-199162.056 -19024.732),6001401300.0
3,3caab5c44277cbdc8fbc755bc0ea7633,6001402800.0,"LINESTRING (-199434.055 -20645.285, -199451.77...",,,,False,POINT (-199434.055 -20645.285),POINT (-199465.897 -20337.863),6001402800.0
4,3caab5c44277cbdc8fbc755bc0ea7633,6001403100.0,"LINESTRING (-199973.418 -21403.301, -199992.82...",,,,False,POINT (-199973.418 -21403.301),POINT (-199827.576 -21391.787),6001403100.0
5,3caab5c44277cbdc8fbc755bc0ea7633,6001422800.0,"LINESTRING (-198397.616 -14446.363, -198397.81...",,,,False,POINT (-198397.616 -14446.363),POINT (-198452.089 -14071.435),6001422800.0
6,3caab5c44277cbdc8fbc755bc0ea7633,6001423601.0,"LINESTRING (-198524.572 -15434.597, -198524.42...",,,,False,POINT (-198524.572 -15434.597),POINT (-198445.990 -14857.530),6001423601.0
7,3caab5c44277cbdc8fbc755bc0ea7633,6001423602.0,"LINESTRING (-198445.990 -14857.530, -198443.90...",,,,False,POINT (-198445.990 -14857.530),POINT (-198397.616 -14446.363),6001423602.0
8,3caab5c44277cbdc8fbc755bc0ea7633,6001423902.0,"LINESTRING (-198575.033 -15796.529, -198572.36...",,,,False,POINT (-198575.033 -15796.529),POINT (-198524.622 -15437.878),6001423902.0
9,3caab5c44277cbdc8fbc755bc0ea7633,,"LINESTRING (-198450.383 -14083.180, -198452.08...",6001422800.0,6001422600.0,2.133764e+18,True,POINT (-198450.383 -14083.180),POINT (-199046.252 -14096.308),2.1337641381851428e+18


In [80]:
shapes.head(2).groupby('shape_array_key').agg(overlay_tracts_borders, tract_gdf=tracts, border_gdf=borders)

                                            geometry
0  LINESTRING (330989.175 -426478.193, 330965.137..., Sub-geometries may have coordinate sequences, but multi-part geometries do not


ValueError: Buffer has wrong number of dimensions (expected 1, got 2)

In [74]:
overlay_to_borders(act_6_shape, borders)

Unnamed: 0,shape_array_key,tract_1,tract_2,intersection_hash,geometry
0,3caab5c44277cbdc8fbc755bc0ea7633,6001422800,6001422600,2133764138185142816,"LINESTRING (-198450.383 -14083.180, -198452.08..."
1,3caab5c44277cbdc8fbc755bc0ea7633,6001422900,6001422600,-5279734711215237442,"LINESTRING (-199019.378 -14123.518, -199019.52..."
4,3caab5c44277cbdc8fbc755bc0ea7633,6001401100,6001400300,-8684109331624108527,"LINESTRING (-198816.499 -17568.317, -198815.27..."
5,3caab5c44277cbdc8fbc755bc0ea7633,6001400500,6001400400,9114761875024974634,"LINESTRING (-198702.090 -16712.584, -198700.48..."
6,3caab5c44277cbdc8fbc755bc0ea7633,6001400600,6001400400,1701263025624594376,"LINESTRING (-198749.754 -17117.124, -198749.61..."
13,3caab5c44277cbdc8fbc755bc0ea7633,6001402900,6001402800,8444362949197194273,"LINESTRING (-199696.961 -21209.665, -199691.76..."
18,3caab5c44277cbdc8fbc755bc0ea7633,6001403000,6001403100,-3476482707806839053,"LINESTRING (-199838.298 -21386.147, -199827.57..."


In [41]:
border_overlaid = overlay_to_borders(act_6_shape, borders)

In [42]:
border_overlaid.explore(column='intersection_hash', tiles='CartoDB Positron', categorical=True)

In [44]:
not_border

Unnamed: 0,shape_array_key,geometry
0,3caab5c44277cbdc8fbc755bc0ea7633,"MULTILINESTRING ((-199973.418 -21403.301, -199..."


In [47]:
tract_overlaid.explore(column='tract', tiles='CartoDB Positron', categorical=True)

In [52]:
pd.concat([tracts_and_borders.loc[:1], tracts_and_borders.loc[4:4]]).explore(column='tract')

In [53]:
tracts_and_borders.reset_index().explore(column='border', tiles='CartoDB Positron', categorical=True)

In [54]:
tracts_and_borders

Unnamed: 0,shape_array_key,tract,geometry,tract_1,tract_2,intersection_hash,border,start,end
0,3caab5c44277cbdc8fbc755bc0ea7633,6001400300.0,"LINESTRING (-198798.506 -17437.270, -198790.77...",,,,False,POINT (-198798.506 -17437.270),POINT (-198749.617 -17116.295)
1,3caab5c44277cbdc8fbc755bc0ea7633,6001401100.0,"LINESTRING (-199161.264 -19021.055, -199159.31...",,,,False,POINT (-199161.264 -19021.055),POINT (-198815.272 -17561.412)
2,3caab5c44277cbdc8fbc755bc0ea7633,6001401300.0,"LINESTRING (-199465.932 -20338.104, -199465.71...",,,,False,POINT (-199465.932 -20338.104),POINT (-199162.056 -19024.732)
3,3caab5c44277cbdc8fbc755bc0ea7633,6001402800.0,"LINESTRING (-199434.055 -20645.285, -199451.77...",,,,False,POINT (-199434.055 -20645.285),POINT (-199465.897 -20337.863)
4,3caab5c44277cbdc8fbc755bc0ea7633,6001403100.0,"LINESTRING (-199973.418 -21403.301, -199992.82...",,,,False,POINT (-199973.418 -21403.301),POINT (-199827.576 -21391.787)
5,3caab5c44277cbdc8fbc755bc0ea7633,6001422800.0,"LINESTRING (-198397.616 -14446.363, -198397.81...",,,,False,POINT (-198397.616 -14446.363),POINT (-198452.089 -14071.435)
6,3caab5c44277cbdc8fbc755bc0ea7633,6001423601.0,"LINESTRING (-198524.572 -15434.597, -198524.42...",,,,False,POINT (-198524.572 -15434.597),POINT (-198445.990 -14857.530)
7,3caab5c44277cbdc8fbc755bc0ea7633,6001423602.0,"LINESTRING (-198445.990 -14857.530, -198443.90...",,,,False,POINT (-198445.990 -14857.530),POINT (-198397.616 -14446.363)
8,3caab5c44277cbdc8fbc755bc0ea7633,6001423902.0,"LINESTRING (-198575.033 -15796.529, -198572.36...",,,,False,POINT (-198575.033 -15796.529),POINT (-198524.622 -15437.878)
9,3caab5c44277cbdc8fbc755bc0ea7633,,"LINESTRING (-198450.383 -14083.180, -198452.08...",6001422800.0,6001422600.0,2.133764e+18,True,POINT (-198450.383 -14083.180),POINT (-199046.252 -14096.308)


In [55]:
tracts_and_borders.to_parquet(f'test_tracts_borders_{analysis_date}.parquet')