# Grab transit routes near the State Highway Network (SHN)

Find transit routes within 1 mile of the SHN.

Data Sources: 
* [SHN on Geoportal](https://opendata.arcgis.com/datasets/77f2d7ba94e040a78bfbe36feb6279da_0.geojson) > processed in `highway_transit_utils.py` > exported to GCS > saved in catalog.
* Transit routes: all transit routes, those in `gtfs_schedule.shapes` and those not, but found in `stops`. Use `traffic_ops/export_shapefiles.py` created `routes_assembled.parquet` in GCS > saved in catalog.

In [1]:
import geopandas as gpd
import intake
import matplotlib.pyplot as plt
import pandas as pd

import highway_transit_utils
from shared_utils import geography_utils

catalog = intake.open_catalog("*.yml")

IMG_PATH = "./img/"
DATA_PATH = "./data/"



In [2]:
#overlay = highway_transit_utils.overlay_transit_to_highways()

#highways.to_parquet(f"{DATA_PATH}highways.parquet")
#transit_routes.to_parquet(f"{DATA_PATH}transit_routes.parquet")
#overlay.to_parquet(f"{DATA_PATH}overlay.parquet")

In [3]:
gdf = gpd.read_parquet(f"{DATA_PATH}overlay.parquet")
transit_routes = gpd.read_parquet(f"{DATA_PATH}transit_routes.parquet")
highways = gpd.read_parquet(f"{DATA_PATH}highways.parquet")

gdf = gdf.assign(
    pct_route = gdf.geometry.length / gdf.route_length,
    pct_highway = gdf.geometry.length / gdf.highway_length,
)

gdf.head(2)

Unnamed: 0,itp_id,shape_id,route_id,route_length,Route,County,District,RouteType,NB,SB,EB,WB,highway_length,geometry,pct_route,pct_highway
0,4,shp-10-09,10,40538.084415,112,ALA,4,State,0,0,1,1,9439.06421,"LINESTRING (5361848.256 3200204.007, 5361809.6...",0.195033,0.837611
1,4,shp-10-10,10,38768.867482,112,ALA,4,State,0,0,1,1,9439.06421,"MULTILINESTRING ((5357130.794 3202353.915, 535...",0.210867,0.86609


In [4]:
MAP_ME = {
    182: "LA Metro", 
    294: "SJ Valley Transportation Authority", 
    279: "BART", 
    282: "SF Muni",
    278: "SD Metropolitan Transit System", 
}

for i, name in MAP_ME.items():
    subset_df = gdf[gdf.itp_id==i]
    print(f"# routes originally for {i}: {transit_routes[transit_routes.itp_id==i].route_id.nunique()}")
    print(f"# routes for {i}: {subset_df.route_id.nunique()}")
    
    
    fig, ax = plt.subplots(figsize  = (12, 8))
    subset_df.plot(column="route_id",  
                         ax = ax)
    ax.set_axis_off()
    plt.title(f"{name} (ITP ID: {i})")
    #display(fig)
    plt.close()

# routes originally for 182: 120
# routes for 182: 120
# routes originally for 294: 71
# routes for 294: 70
# routes originally for 279: 12
# routes for 279: 12
# routes originally for 282: 59
# routes for 282: 59
# routes originally for 278: 109
# routes for 278: 108


In [5]:
gdf.pct_highway.describe()

count    47396.000000
mean         0.647700
std          3.915224
min          0.000017
25%          0.066727
50%          0.150303
75%          0.417973
max        530.880377
Name: pct_highway, dtype: float64

In [6]:
gdf.pct_route.describe()

count    47396.000000
mean         0.299967
std          0.285091
min          0.000005
25%          0.078179
50%          0.192113
75%          0.449435
max          1.000000
Name: pct_route, dtype: float64

In [7]:
gdf[gdf.pct_highway > 1].pct_highway.describe()

count    4976.000000
mean        4.353537
std        11.413728
min         1.000109
25%         1.217586
50%         1.646066
75%         3.279469
max       530.880377
Name: pct_highway, dtype: float64

In [8]:
print(f"# rows: {len(gdf)}")
print(f"# rows with pct_highway > 1 (wrong?!): {len(gdf[gdf.pct_highway > 1])}")
print(f"% rows with pct_highway > 1 (wrong?!): {len(gdf[gdf.pct_highway > 1]) / len(gdf)}")

# rows: 47396
# rows with pct_highway > 1 (wrong?!): 4976
% rows with pct_highway > 1 (wrong?!): 0.10498776268039497


There are some rows that have `pct_highway` > 1. `pct_route` falls between 0-1, and that's correct. `pct_highway` > 1 means that the transit route runs along that highway, most likely in both directions. 

LA Metro Line 910 (Silver Line) runs on the 110 freeway in both directions.

Has to do with the fact that highway length was calculated only for 1 direction (similar to centerline). Can just set all `pct_highway` > 1 to be 1. The route runs the entirety of the highway segment, and once it hits the max, we can store that as 1. The length was calculated, then the 1 mile buffer drawn, then the dissolve (dissolve is computationally expensive).

In [9]:
gdf[(gdf.pct_highway > 1) & (gdf.itp_id==182) & (gdf.County=="LA")
   ][["route_id", "route_length", "Route", "highway_length",
      "geometry", "pct_route", "pct_highway"]]

Unnamed: 0,route_id,route_length,Route,highway_length,geometry,pct_route,pct_highway
16933,266-13153,120417.620068,164,18141.693847,"LINESTRING (6539404.063 1860630.634, 6539410.0...",0.254201,1.687286
16934,266-13153,126927.964445,164,18141.693847,"LINESTRING (6540315.331 1831291.390, 6540363.0...",0.241162,1.687286
18172,910-13153,203257.376049,110,125191.103899,"MULTILINESTRING ((6491357.376 1842079.806, 649...",0.691628,1.122911
18175,910-13153,206333.820355,110,125191.103899,"LINESTRING (6474114.722 1725182.990, 6474116.0...",0.682162,1.124305
18784,804,163016.234449,110,38787.994447,"LINESTRING (6490429.325 1842121.250, 6490433.4...",0.300734,1.263908
...,...,...,...,...,...,...,...
40738,617-13153,39778.887010,187,1169.731595,"MULTILINESTRING ((6444861.771 1840281.875, 644...",0.256532,8.723856
40739,806,80968.814981,187,1169.731595,"LINESTRING (6442837.739 1832875.522, 6442876.8...",0.106869,7.397452
40740,806,80971.748323,187,1169.731595,"LINESTRING (6451453.336 1831919.219, 6451239.8...",0.107278,7.426072
45922,577-13153,155026.635759,22,7716.304762,"MULTILINESTRING ((6534260.869 1745039.562, 653...",0.101056,2.030289


In [10]:
# Set pct_highway to have max of 1
gdf = gdf.assign(
    pct_highway = gdf.apply(lambda x: 1 if x.pct_highway > 1 
                            else x.pct_highway, axis=1)
)

gdf.head(2)

Unnamed: 0,itp_id,shape_id,route_id,route_length,Route,County,District,RouteType,NB,SB,EB,WB,highway_length,geometry,pct_route,pct_highway
0,4,shp-10-09,10,40538.084415,112,ALA,4,State,0,0,1,1,9439.06421,"LINESTRING (5361848.256 3200204.007, 5361809.6...",0.195033,0.837611
1,4,shp-10-10,10,38768.867482,112,ALA,4,State,0,0,1,1,9439.06421,"MULTILINESTRING ((5357130.794 3202353.915, 535...",0.210867,0.86609


In [11]:
def parallel_or_intersecting(df, pct_route_threshold=0.5, pct_highway_threshold=0.1):
    df = df.assign(
        parallel = df.apply(lambda x: 
                            1 if (
                                (x.pct_route >= pct_route_threshold) and 
                                (x.pct_highway >= pct_highway_threshold)
                            ) else 0, axis=1),
    )
    
    return df

In [12]:
orig_highways = (catalog.state_highway_network.read()
                .to_crs(geography_utils.CA_StatePlane))
    

for i in [0, 0.05, 0.1, 0.15, 0.2]:
    gdf2 = parallel_or_intersecting(gdf, pct_route_threshold=0.4, 
                                    pct_highway_threshold=i)
    print(f"highway threshold: {i}")
    print("------------------------------------")
    print(gdf2.parallel.value_counts())
    print(f"%: {len(gdf2[gdf2.parallel==1]) / len(gdf2)}")
    
    for i, name in MAP_ME.items():
        subset_df = gdf2[gdf2.itp_id==i]

        print(f"# routes for {i}: {subset_df.route_id.nunique()}")
        print(f"# routes parallel: {len(subset_df[subset_df.parallel==1]) / len(subset_df)}")
    
        fig, ax = plt.subplots(figsize  = (12, 8))
        orig_highways[
            (orig_highways.Route.isin(subset_df.Route)) & 
            (orig_highways.County.isin(subset_df.County))
        ].drop_duplicates(subset=["Route", "County"]).plot(ax=ax, color="gray")
        
        subset_df.plot(column="parallel", ax = ax, 
                       categorical=True, legend=True)
        
        ax.set_axis_off()
        
        plt.title(f"{name} (ITP ID: {i}, parallel vs intersecting)")
        #display(fig)
        plt.close()

highway threshold: 0
------------------------------------
0    33885
1    13511
Name: parallel, dtype: int64
%: 0.2850662503164824
# routes for 182: 120
# routes parallel: 0.14680153089119738
# routes for 294: 70
# routes parallel: 0.2605042016806723
# routes for 279: 12
# routes parallel: 0.024179620034542316
# routes for 282: 59
# routes parallel: 0.4165745856353591
# routes for 278: 108
# routes parallel: 0.33962264150943394
highway threshold: 0.05
------------------------------------
0    34877
1    12519
Name: parallel, dtype: int64
%: 0.2641362140265001
# routes for 182: 120
# routes parallel: 0.1394204483324221
# routes for 294: 70
# routes parallel: 0.25133689839572193
# routes for 279: 12
# routes parallel: 0.024179620034542316
# routes for 282: 59
# routes parallel: 0.40994475138121544
# routes for 278: 108
# routes parallel: 0.2715633423180593
highway threshold: 0.1
------------------------------------
0    36944
1    10452
Name: parallel, dtype: int64
%: 0.220524938813402
#

In [13]:
gdf3 = parallel_or_intersecting(gdf, pct_route_threshold=0.4, 
                                    pct_highway_threshold=0.1)

In [14]:
# For each highway, calculate % of parallel to intersecting routes
def aggregate(df):
    
    highway_group_cols = ["Route", "County", "District", "RouteType", 
                     "NB", "SB", "EB", "WB", "highway_length"]

    df = (geography_utils.aggregate_by_geography(
            df, 
            group_cols = highway_group_cols,
            sum_cols = ["parallel"],
            count_cols = ["shape_id"],
            nunique_cols = ["itp_id", "route_id"]
           ).rename(columns = {
              "shape_id": "num_routes",
              "itp_id": "num_operators",
              "route_id": "unique_route_ids", 
          })
         )

    df = df.assign(
        pct_parallel = df.parallel.divide(df.num_routes)
    )
    
    display_df = df.drop(columns = ["County", "District", 
                                    "SB", "WB", "highway_length"])
    display(display_df.sort_values("pct_parallel", ascending=False))
    
    return df

In [17]:
counties = ["LA"]
for c in counties:
    la = aggregate(gdf3[gdf3.County==c])

Unnamed: 0,Route,RouteType,NB,EB,parallel,num_routes,num_operators,unique_route_ids,pct_parallel
20,14,State,1,1,111,253,17,45,0.438735
3,60,State,0,1,157,380,21,73,0.413158
4,72,State,1,0,36,109,17,18,0.330275
34,213,State,1,0,29,93,7,26,0.311828
30,126,State,0,1,10,34,3,13,0.294118
12,27,State,1,0,36,123,18,33,0.292683
17,57,State,1,0,14,50,3,24,0.28
29,107,State,1,0,22,87,9,26,0.252874
8,110,Interstate,1,0,163,679,24,149,0.240059
28,105,Interstate,0,1,79,332,22,83,0.237952


In [21]:
operators_df = {}
for i, name in MAP_ME.items():
    print(f"ITP ID: {name}")
    operators_df[i] = aggregate(gdf3[gdf3.itp_id==i])

ITP ID: LA Metro


Unnamed: 0,Route,RouteType,NB,EB,parallel,num_routes,num_operators,unique_route_ids,pct_parallel
14,27,State,1,0,17,63,1,14,0.269841
8,110,Interstate,1,0,87,422,1,58,0.206161
0,10,Interstate,0,1,62,401,1,65,0.154613
34,213,State,1,0,2,15,1,5,0.133333
3,60,State,0,1,15,114,1,18,0.131579
33,210,Interstate,0,1,12,94,1,23,0.12766
7,101,US,1,0,51,433,1,75,0.117783
18,170,State,1,0,10,85,1,22,0.117647
17,134,State,0,1,11,119,1,29,0.092437
2,5,Interstate,1,0,29,330,1,61,0.087879


ITP ID: SJ Valley Transportation Authority


Unnamed: 0,Route,RouteType,NB,EB,parallel,num_routes,num_operators,unique_route_ids,pct_parallel
13,152,State,0,1,6,15,1,5,0.4
5,82,State,1,0,51,137,1,29,0.372263
3,680,Interstate,1,0,28,85,1,24,0.329412
2,237,State,0,1,41,141,1,34,0.29078
9,87,State,1,0,34,127,1,30,0.267717
12,85,State,1,0,34,146,1,35,0.232877
10,280,Interstate,1,0,35,153,1,36,0.228758
4,880,Interstate,1,0,23,119,1,31,0.193277
7,101,US,1,0,29,225,1,53,0.128889
15,17,State,1,0,7,68,1,15,0.102941


ITP ID: BART


Unnamed: 0,Route,RouteType,NB,EB,parallel,num_routes,num_operators,unique_route_ids,pct_parallel
15,61,State,1,0,2,2,1,2,1.0
3,580,Interstate,0,1,4,29,1,10,0.137931
4,880,Interstate,1,0,4,33,1,12,0.121212
0,112,State,0,1,0,13,1,8,0.0
28,82,State,1,0,0,22,1,8,0.0
22,4,State,0,1,0,10,1,2,0.0
23,80,Interstate,0,1,0,23,1,8,0.0
24,242,State,1,0,0,12,1,2,0.0
25,280,Interstate,1,0,0,23,1,8,0.0
26,680,Interstate,1,0,0,12,1,2,0.0


ITP ID: SF Muni


Unnamed: 0,Route,RouteType,NB,EB,parallel,num_routes,num_operators,unique_route_ids,pct_parallel
3,101,US,1,0,125,207,1,53,0.603865
2,280,Interstate,1,0,94,197,1,53,0.477157
1,80,Interstate,0,1,66,149,1,40,0.442953
10,35,State,1,0,15,44,1,15,0.340909
8,1,State,1,0,37,115,1,34,0.321739
7,82,State,1,0,8,58,1,16,0.137931
0,80,Interstate,0,1,0,2,1,1,0.0
4,82,State,1,0,0,36,1,9,0.0
5,101,US,1,0,0,29,1,9,0.0
6,280,Interstate,1,0,0,33,1,10,0.0


ITP ID: SD Metropolitan Transit System


Unnamed: 0,Route,RouteType,NB,EB,parallel,num_routes,num_operators,unique_route_ids,pct_parallel
8,163,State,1,0,63,133,1,35,0.473684
11,15,State,1,0,12,32,1,11,0.375
19,282,State,0,1,2,6,1,3,0.333333
13,125,State,1,0,17,53,1,21,0.320755
17,905,State,0,1,12,41,1,12,0.292683
6,78,State,0,1,2,8,1,4,0.25
16,11,State,1,0,2,8,1,4,0.25
9,805,Interstate,1,0,53,222,1,65,0.238739
4,67,State,1,0,10,42,1,15,0.238095
14,75,State,1,0,14,67,1,17,0.208955
