In [1]:
import dask
import dask.dataframe as dd
import dask_geopandas as dg
import datetime as dt
import geopandas as gpd
import glob
import os
import pandas as pd
import sys

#from shared_utils import utils
#from utilities import catalog_filepath, GCS_FILE_PATH
#from update_vars import analysis_date



In [None]:
ALL_BUS = f"{DASK_GCS}all_bus.parquet"

bus_hqtc = gpd.read_parquet(ALL_BUS)

In [None]:
bus_hqtc.head()

In [2]:
DASK_GCS = "gs://calitp-analytics-data/data-analyses/dask_test/"
HQTA_GCS = "gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/"

PAIRWISE_FILE = f"{DASK_GCS}intermediate/pairwise.parquet"
SUBSET_CORRIDORS = f"{DASK_GCS}intermediate/subset_corridors.parquet"

In [3]:
intersecting_pairs = pd.read_parquet(PAIRWISE_FILE)
corridors = gpd.read_parquet(SUBSET_CORRIDORS)

In [4]:
def attach_geometry_to_pairs(corridors: gpd.GeoDataFrame, 
                             intersecting_pairs: pd.DataFrame) -> gpd.GeoDataFrame:
    
    segment_cols = ["hqta_segment_id", "geometry"]
    
    rename_cols = {
        "hqta_segment_id": "intersect_hqta_segment_id", 
        "geometry": "intersect_geometry"
    }
    
    col_order = segment_cols + list(rename_cols.values())
    
    pairs_with_geom1 = pd.merge(
        corridors[segment_cols],
        intersecting_pairs, 
        on = "hqta_segment_id",
        how = "inner"
    )
    
    pairs_with_geom2 = pd.merge(
        (corridors[segment_cols]
         .rename(columns = rename_cols)),
        pairs_with_geom1, 
        on = "intersect_hqta_segment_id",
        how = "inner"
    )
    
    gdf = (pairs_with_geom2.reindex(columns = col_order)
           .sort_values(["hqta_segment_id", "intersect_hqta_segment_id"])
           .reset_index(drop=True)
          )
    
    return gdf

In [5]:
pairs_table = attach_geometry_to_pairs(corridors, intersecting_pairs)

In [6]:
def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    """
    We have pairwise table already, and now there's geometry attached to 
    both the hqta_segment_id and the intersect_hqta_segment_id.
    
    Use iterrtuples to loop through and store results.
    Convert back to gdf at the end.
    """
    results = []
    segments = []

    EPSG_CODE = pairs_table.crs.to_epsg()
    
    for row in pairs_table.itertuples():
        this_segment = getattr(row, "hqta_segment_id")
        this_segment_geom = getattr(row, 'geometry')
        intersecting_segment_geom = getattr(row, 'intersect_geometry')

        intersect_result = this_segment_geom.intersection(intersecting_segment_geom)

        results.append(intersect_result)
        segments.append(this_segment)
        
    intersect_results = (gpd.GeoDataFrame(
        segments, geometry = results,
        crs = f"EPSG: {EPSG_CODE}")
                         .rename(columns = {
                             0: "hqta_segment_id", 
                             1: "geometry"})
                        )
                         
    return intersect_results

In [7]:
pairs_table.shape

(181496, 4)

In [9]:
test_results = find_intersections(pairs_table[:1_000])

In [11]:
test_segment = test_results.hqta_segment_id.iloc[0]

In [14]:
corridors[corridors.hqta_segment_id == test_segment].explore(tiles="Carto DB Positron")

In [17]:
these_are_intersections = pairs_table[
    pairs_table.hqta_segment_id==test_segment][
    ["intersect_hqta_segment_id", "intersect_geometry"]]

these_are_intersections = these_are_intersections.set_geometry("intersect_geometry")

In [20]:
these_are_intersections.explore("intersect_hqta_segment_id", 
                                tiles="Carto DB Positron")

In [25]:
test = test_results[test_results.hqta_segment_id == test_segment]

test = test.assign(
    index = test.index
)



In [27]:
test.explore("index", tiles = "Carto DB Positron")

In [None]:
test_segment = 429285384

#this_segment = corridors[corridors.hqta_segment_id==test_segment]
#pairs = find_corresponding_pairs(corridors, intersecting_pairs, test_segment)

In [None]:
#intersect1 = pairs[:1]
#intersect2 = pairs[2:3]

In [None]:
r1 = gpd.clip(this_segment[["hqta_segment_id", "geometry"]], 
                 intersect1[["hqta_segment_id", "geometry"]],
                 #how = "intersection"
                )

In [None]:
r1.plot()

In [None]:
for row in pairs.itertuples():
    intersecting_geom = getattr(row, 'geometry')
    intersecting_segment = getattr(row, 'hqta_segment_id')


In [None]:
test_intersect_segment = 671013905

In [None]:
pair_segment = pairs[pairs.hqta_segment_id==test_intersect_segment]

In [None]:
left.intersection(pair_segment).plot()

In [None]:
pair_segment.plot()

In [None]:
gpd.overlay(
    left[["hqta_segment_id", "geometry"]], 
    pairs[pairs.hqta_segment_id==test_intersect_segment][["geometry"]], 
    how = "intersection", 
).explore()

In [None]:
gpd.clip(
    left[["hqta_segment_id", "geometry"]], 
    pairs[pairs.hqta_segment_id==test_intersect_segment][["geometry"]], 
).explore()

In [None]:
o1.plot()

In [None]:
o1 = o1.assign(
    index = o1.index,
    x = o1.geometry.centroid.x.round(2),
    y = o1.geometry.centroid.y.round(2)
)

In [None]:
o1.drop_duplicates(subset=["x", "y"])

In [None]:
for i in o1.index.tolist():
    display(o1[o1.index==i].explore())

In [None]:
o1.explore()

In [None]:
corridors[corridors.hqta_segment_id==test_segment].explore()

In [None]:
o1

In [None]:
o1.explore()

In [None]:
from update_vars import COMPILED_CACHED_VIEWS

In [None]:
intersecting_pairs.shape, corridors.shape

In [None]:
def clip(operator_df: dg.GeoDataFrame, 
         intersecting_pairs: gpd.GeoDataFrame, 
        ) -> dg.GeoDataFrame:
    
    operator_routes = operator_df.route_identifier.unique()
    
    results = []
    
    for i in operator_routes:
        clipped = dg.clip(
            operator_df[operator_df.route_identifier==i],
            intersecting_pairs[intersecting_pairs.route_identifier != i], 
            keep_geom_type = True
        )
        results.append(clipped)
    
    intersections = dd.multi.concat(results, axis=0).drop_duplicates()

    return intersections

In [None]:
itp_id = 182

metro = corridors[corridors.calitp_itp_id==itp_id]

test_route = 3588563102 # 720
test_route

In [None]:
#test = corridors.set_index("calitp_itp_id").loc[182]

In [None]:
metro720_segments = metro[metro.route_identifier==test_route].hqta_segment_id.unique()

In [None]:
metro_pairs = intersecting_pairs[
    intersecting_pairs.hqta_segment_id.isin(metro720_segments)]

In [None]:
# Take a look at what routes intersect with Metro's 720
cols = ["calitp_itp_id", "route_id", "hqta_segment_id", 
        "route_identifier", "geometry"]

# Full info of metro_pairs displayed
corridors[corridors.hqta_segment_id.isin(metro_pairs.intersect_hqta_segment_id)].head()

In [None]:
# Pick one segment along 720 to look at
metro_segment = 49694670

In [None]:
metro[metro.hqta_segment_id==metro_segment].explore("route_id")

In [None]:
intersect_with_segment = metro_pairs[metro_pairs.hqta_segment_id==metro_segment]
intersect_with_segment.head()

In [None]:
corridors[corridors.hqta_segment_id.isin(
    intersect_with_segment.intersect_hqta_segment_id)].explore("route_id")

Why still clipping needed?

All the pairwise combos that show up, those are where the intersections occur, and those segments can be selected, draw buffer on, and find stops near there.

In [None]:
one_route = metro[metro.route_identifier==test_route]

In [None]:
intersecting_pairs_for_one_route = intersecting_pairs[
    intersecting_pairs.hqta_segment_id.isin(one_route.hqta_segment_id.unique())
]

In [None]:
these_segments_intersect_with_720 = corridors[
    corridors.hqta_segment_id.isin(
        intersecting_pairs_for_one_route.intersect_hqta_segment_id.unique())][
    ["hqta_segment_id", "route_direction", "geometry"]]

In [None]:
one_route.plot()

In [None]:
gpd.clip(one_route, 
         these_segments_intersect_with_720,
        keep_geom_type=True
        ).plot()

In [None]:

for seg in one_route.hqta_segment_id.unique()[:1]:
    gpd.overlay(one_route[one_route.hqta_segment_id==seg],
                these_segments_intersect_with_720,
             how="intersection", 
                keep_geom_type=True
            )


In [None]:
unique_segments = corridors.hqta_segment_id.unique()

In [None]:
def find_corresponding_pairs(gdf: gpd.GeoDataFrame,
    pairs_table: pd.DataFrame, 
                             segment_id: int)-> gpd.GeoDataFrame: 
    

In [None]:
import warnings
warnings.filterwarnings("ignore")

def intersections_for_operator(corridors, itp_id):
    operator_gdf = dg.from_geopandas(
        corridors[corridors.calitp_itp_id==itp_id], npartitions=1)
    
    pairs = find_corresponding_pairs(corridors, intersecting_pairs, itp_id)
        
    results = clip(operator_gdf, pairs)
    
    return results

In [None]:
all_clipped = gpd.read_parquet(f"{HQTA_GCS}all_clipped.parquet")

metro_old = all_clipped[all_clipped.calitp_itp_id==182]
bbb_old = all_clipped[all_clipped.calitp_itp_id==300]

In [None]:
metro = intersections_for_operator(corridors, 182).compute()

In [None]:
metro.plot("segment_sequence")

In [None]:
metro_old.plot("hqta_segment_id")

In [None]:
bbb = intersections_for_operator(corridors, 300).compute()

In [None]:
bbb.plot("segment_sequence")

In [None]:
bbb_old.plot("hqta_segment_id")