# Transit Routes on SHN

Draw a very small buffer around highways and capture only transit routes that run on it.

In [1]:
import branca
import datetime as dt
import geopandas as gpd
import intake
import os
import pandas as pd

os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
from siuba import *

import create_parallel_corridors
import utils
from shared_utils import map_utils, geography_utils
from shared_utils import calitp_color_palette as cp

IMG_PATH = create_parallel_corridors.IMG_PATH
DATA_PATH = create_parallel_corridors.DATA_PATH

catalog = intake.open_catalog("*.yml")



In [2]:
'''
# Need route_ids for parallel corridors
# Add this info on and use alternate_df in make_analysis_data()
SELECTED_DATE = dt.date(2022, 1, 6)

trips = (tbl.views.gtfs_schedule_fact_daily_trips()
         >> filter(_.service_date == SELECTED_DATE, _.is_in_service == True)
         >> select(_.calitp_itp_id, _.service_date, 
                   _.route_id, _.trip_key)
)

dim_trips = (tbl.views.gtfs_schedule_dim_trips()
             >> select(_.calitp_itp_id, _.route_id, 
                       _.shape_id, _.trip_key)
)

shapes = (trips 
          >> inner_join(_, dim_trips, 
                        on = ["calitp_itp_id", "trip_key", "route_id"]
                       )
          >> select(_.calitp_itp_id, _.route_id, _.shape_id)
          >> distinct()
          >> collect()
         )
'''

'\n# Need route_ids for parallel corridors\n# Add this info on and use alternate_df in make_analysis_data()\nSELECTED_DATE = dt.date(2022, 1, 6)\n\ntrips = (tbl.views.gtfs_schedule_fact_daily_trips()\n         >> filter(_.service_date == SELECTED_DATE, _.is_in_service == True)\n         >> select(_.calitp_itp_id, _.service_date, \n                   _.route_id, _.trip_key)\n)\n\ndim_trips = (tbl.views.gtfs_schedule_dim_trips()\n             >> select(_.calitp_itp_id, _.route_id, \n                       _.shape_id, _.trip_key)\n)\n\nshapes = (trips \n          >> inner_join(_, dim_trips, \n                        on = ["calitp_itp_id", "trip_key", "route_id"]\n                       )\n          >> select(_.calitp_itp_id, _.route_id, _.shape_id)\n          >> distinct()\n          >> collect()\n         )\n'

In [3]:
'''
transit_routes = catalog.transit_routes.read()

df = pd.merge(transit_routes,
              shapes,
              on = ["calitp_itp_id", "shape_id"],
              # Outer join shows there are left_only and right_only obs
              # But, can only do stuff with full info
              how = "inner",
              # There are some obs where same shape_id is linked to multiple route_id
              # Allow for 1:m
              validate = "1:m",
).rename(columns = {"calitp_itp_id": "itp_id"})
'''

'\ntransit_routes = catalog.transit_routes.read()\n\ndf = pd.merge(transit_routes,\n              shapes,\n              on = ["calitp_itp_id", "shape_id"],\n              # Outer join shows there are left_only and right_only obs\n              # But, can only do stuff with full info\n              how = "inner",\n              # There are some obs where same shape_id is linked to multiple route_id\n              # Allow for 1:m\n              validate = "1:m",\n).rename(columns = {"calitp_itp_id": "itp_id"})\n'

In [4]:
'''
create_parallel_corridors.make_analysis_data(
    hwy_buffer_feet=50, alternate_df = df,
    pct_route_threshold = 0.3, pct_highway_threshold = 0.1,
    DATA_PATH = DATA_PATH, FILE_NAME = "routes_on_shn"
)
'''

'\ncreate_parallel_corridors.make_analysis_data(\n    hwy_buffer_feet=50, alternate_df = df,\n    pct_route_threshold = 0.3, pct_highway_threshold = 0.1,\n    DATA_PATH = DATA_PATH, FILE_NAME = "routes_on_shn"\n)\n'

In [5]:
transit_routes = catalog.transit_routes.read()

In [6]:
gdf = gpd.read_parquet("./data/transit_routes.parquet")

In [7]:
df = gpd.read_parquet(f"{DATA_PATH}routes_on_shn.parquet")

# Only keep parallel routes
df = df[df.parallel == 1].reset_index(drop=True)

# Draw highways with 250 ft buffer
highways = gpd.read_parquet(f"{DATA_PATH}highways.parquet")

# Bring in service hours increase data
service = (pd.read_parquet(f"{utils.GCS_FILE_PATH}2022_Jan/service_increase.parquet")
           .rename(columns = {"calitp_itp_id": "itp_id"})
          )

Address duplicates.

Duplicates mean same `shape_id`, but multiple `route_id` values.

It's allowed up until now...but should it be allowed for aggregation?
Will it be double-counting? Leave for now...but might need to get rid of duplicates...

In [8]:
check_shapes = ["107"]
check_ids = [194]

df[(df.itp_id.isin(check_ids)) & (df.shape_id.isin(check_shapes))]

Unnamed: 0,itp_id,shape_id,route_id,route_length,total_routes,Route,County,District,RouteType,NB,SB,EB,WB,highway_length,geometry,pct_route,pct_highway,parallel
293,194,107,1237,132766.125267,52,1,MRN,4,State,1,1,0,0,263553.675909,"MULTILINESTRING ((5256730.147 3264676.514, 525...",0.319,0.161,1
294,194,107,61,132766.125267,52,1,MRN,4,State,1,1,0,0,263553.675909,"MULTILINESTRING ((5256730.147 3264676.514, 525...",0.319,0.161,1


In [9]:
service[(service.itp_id.isin(check_ids)) & (service.shape_id.isin(check_shapes))]

Unnamed: 0,itp_id,shape_id,day_name,departure_hour,trips_per_hour,mean_runtime_min,tract_type,min_runtime_min,runtime,additional_trips,service_hrs,addl_service_hrs,service_hours_annual,addl_service_hrs_annual
174304,194,107,Saturday,5,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174305,194,107,Saturday,6,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174306,194,107,Saturday,7,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174307,194,107,Saturday,8,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174308,194,107,Saturday,9,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174309,194,107,Saturday,10,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174310,194,107,Saturday,11,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174311,194,107,Saturday,12,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174312,194,107,Saturday,13,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667
174313,194,107,Saturday,14,0.0,,rural,85.0,85,1.0,0.0,1.416667,0.0,73.666667


In [10]:
service_increase = pd.merge(service, 
                            df, 
                            on = ["itp_id", "shape_id"],
                            how = "outer",
                            validate = "m:m",
                            indicator=True
                           )

service_increase._merge.value_counts()

left_only     256720
both           15472
right_only        34
Name: _merge, dtype: int64

In [11]:
service_increase = pd.merge(service, 
                            df, 
                            on = ["itp_id", "shape_id"],
                            how = "inner",
                            validate = "m:m",
                           )

In [12]:
print(f"# unique route ids originally: {df.route_id.nunique()}")
print(f"# unique route ids with service hrs info: {service_increase.route_id.nunique()}")

# unique route ids originally: 295
# unique route ids with service hrs info: 273


In [13]:
service_increase.columns

Index(['itp_id', 'shape_id', 'day_name', 'departure_hour', 'trips_per_hour',
       'mean_runtime_min', 'tract_type', 'min_runtime_min', 'runtime',
       'additional_trips', 'service_hrs', 'addl_service_hrs',
       'service_hours_annual', 'addl_service_hrs_annual', 'route_id',
       'route_length', 'total_routes', 'Route', 'County', 'District',
       'RouteType', 'NB', 'SB', 'EB', 'WB', 'highway_length', 'geometry',
       'pct_route', 'pct_highway', 'parallel'],
      dtype='object')

In [14]:
sum_cols = ["additional_trips", "service_hrs", 
            "addl_service_hrs", "service_hours_annual", 
            "addl_service_hrs_annual"
           ]
a1 = geography_utils.aggregate_by_geography(service_increase,
                                       group_cols = ["itp_id", "day_name", "tract_type"],
                                       sum_cols = sum_cols,
                                      )

In [15]:
print(f"# operators: {service_increase.itp_id.nunique()}")
print(f"# operators-routes: {len(service_increase[['itp_id', 'route_id']].drop_duplicates())}")

# operators: 91
# operators-routes: 324


In [16]:
# Can't find FootHill Transit (itp_id 112)

In [17]:
service_increase[(service_increase.itp_id==182)].route_id.value_counts()

150-13153    48
232-13153    48
487-13153    48
501-13153    48
534-13153    48
550-13153    48
577-13153    48
910-13153    48
Name: route_id, dtype: int64

In [18]:
service_increase[(service_increase.itp_id==182) & 
                 (service_increase.route_id.str.contains("910")) & 
                 (service_increase.day_name=="Thursday")
                ][["itp_id", "day_name", "tract_type", "departure_hour"] + sum_cols]

Unnamed: 0,itp_id,day_name,tract_type,departure_hour,additional_trips,service_hrs,addl_service_hrs,service_hours_annual,addl_service_hrs_annual
4192,182,Thursday,urban,5,2.0,2.7,2.7,702.0,702.0
4193,182,Thursday,urban,6,1.0,4.25,1.416667,1105.0,368.333333
4194,182,Thursday,urban,7,1.0,4.5,1.5,1170.0,390.0
4195,182,Thursday,urban,8,1.0,4.7,1.566667,1222.0,407.333333
4196,182,Thursday,urban,9,2.0,3.033333,3.033333,788.666667,788.666667
4197,182,Thursday,urban,10,2.0,3.066667,3.066667,797.333333,797.333333
4198,182,Thursday,urban,11,2.0,3.133333,3.133333,814.666667,814.666667
4199,182,Thursday,urban,12,2.0,3.2,3.2,832.0,832.0
4200,182,Thursday,urban,13,2.0,3.233333,3.233333,840.666667,840.666667
4201,182,Thursday,urban,14,2.0,3.233333,3.233333,840.666667,840.666667


In [19]:
MAP_ME = {
    182: "LA Metro", 
    294: "SJ Valley Transportation Authority", 
    279: "BART", 
    282: "SF Muni",
    278: "SD Metropolitan Transit System", 
    112: "Foothill Transit",
}

for itp_id, operator in MAP_ME.items():
    subset = df[df.itp_id==itp_id]
    print(f"{itp_id}: {operator}")
    print("**************************************************")
    cols = ["route_id", "total_routes", "Route", "RouteType",
            "County", "District", 
            "pct_route", "pct_highway"
           ]
    display(subset[cols])

182: LA Metro
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway
98,150-13153,119,27,State,LA,7,0.423,0.293
168,501-13153,119,134,State,LA,7,0.517,0.69
188,487-13153,119,10,Interstate,LA,7,0.429,0.142
208,577-13153,119,605,Interstate,LA,7,0.639,0.722
227,550-13153,119,110,Interstate,LA,7,0.356,0.364
228,910-13153,119,110,Interstate,LA,7,0.42,0.693
269,232-13153,119,1,State,LA,7,0.639,0.259
270,534-13153,119,1,State,LA,7,0.877,0.335


294: SJ Valley Transportation Authority
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway
13,104,92,237,State,SCL,4,0.328,0.659
14,Express 104,92,237,State,SCL,4,0.328,0.659
44,22,92,82,State,SCL,4,0.637,0.963
45,522,92,82,State,SCL,4,0.632,0.966
46,Rapid 522,92,82,State,SCL,4,0.632,0.966
113,102,92,85,State,SCL,4,0.437,0.576
114,Express 102,92,85,State,SCL,4,0.437,0.576
178,101,92,280,Interstate,SCL,4,0.379,0.454
179,103,92,280,Interstate,SCL,4,0.631,0.862
180,Express 101,92,280,Interstate,SCL,4,0.379,0.454


279: BART
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway


282: SF Muni
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway


278: SD Metropolitan Transit System
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway
214,225,101,805,Interstate,SD,11,0.325,0.279
344,235,101,15,Interstate,SD,11,0.536,0.379
345,235,101,15,State,SD,11,0.449,0.915
346,280,101,15,State,SD,11,0.552,0.983
347,290,101,15,State,SD,11,0.439,0.583
351,110,101,163,State,SD,11,0.538,0.966
352,20,101,163,State,SD,11,0.325,0.856
353,280,101,163,State,SD,11,0.317,0.964
354,290,101,163,State,SD,11,0.425,0.964
355,901,101,75,State,SD,11,0.54,0.892


112: Foothill Transit
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway


## Make map of these parallel routes for CA

Double check that these are as expected before calculating additional trips, buses, capital expenditures, etc

In [20]:
def data_to_plot(df):
    keep_cols = ["itp_id", "route_id", 
                 "Route", "County", "District", "RouteType",
                 "pct_route", "pct_highway", "parallel",
                 "geometry"
                ]
    df = df[keep_cols].reset_index(drop=True)
    df = df.assign(
        geometry = df.geometry.buffer(200).simplify(tolerance=100),
    )

    return df

to_map = data_to_plot(df)
hwy_df = highways

# Set various components for map
hwys_popup_dict = {
    "Route": "Highway Route",
    "RouteType": "Route Type",
    "County": "County"   
}

transit_popup_dict = {
    "itp_id": "Operator ITP ID",
    "route_id": "Route ID",
    "pct_route": "% overlapping route",
    "pct_highway": "% overlapping highway",
}

hwys_color = branca.colormap.StepColormap(
    colors=["black", "gray"],
)

colorscale = branca.colormap.StepColormap(
    colors=[
        cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue
        cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange
    ],
)


LAYERS_DICT = {
    "Highways": {"df": hwy_df,
        "plot_col": "Route",
        "popup_dict": hwys_popup_dict, 
        "tooltip_dict": hwys_popup_dict,
        "colorscale": hwys_color,
    },
    "Transit Routes": {"df": to_map,
        "plot_col": "parallel",
        "popup_dict": transit_popup_dict, 
        "tooltip_dict": transit_popup_dict,
        "colorscale": colorscale,
    },
}
    
LEGEND_URL = (
    "https://raw.githubusercontent.com/cal-itp/data-analyses/"
    "main/bus_service_increase/"
    "img/legend_intersecting_parallel.png"
)

LEGEND_DICT = {
    "legend_url": LEGEND_URL,
    "legend_bottom": 85,
    "legend_left": 5,
}


fig = map_utils.make_folium_multiple_layers_map(
    LAYERS_DICT,
    fig_width = 700, fig_height = 700, 
    zoom = map_utils.REGION_CENTROIDS["CA"]["zoom"], 
    centroid = map_utils.REGION_CENTROIDS["CA"]["centroid"], 
    title=f"Parallel Transit Routes to SHN",
    legend_dict = LEGEND_DICT
)
    
#display(fig)
#fig.save(f"{IMG_PATH}parallel_{operator_name}.html")
#print(f"{operator_name} map saved")