# Transit Routes on SHN

Draw a very small buffer around highways and capture only transit routes that run on it.

In [1]:
import branca
import datetime as dt
import geopandas as gpd
import intake
import os
import pandas as pd

os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp_data_analysis.tables import tbls
from calitp_data_analysis.sql import query_sql
from siuba import *

import create_parallel_corridors
from bus_service_utils import utils
from shared_utils import geography_utils
from shared_utils import calitp_color_palette as cp

IMG_PATH = create_parallel_corridors.IMG_PATH
DATA_PATH = create_parallel_corridors.DATA_PATH

catalog = intake.open_catalog("*.yml")



In [2]:
'''
# Need route_ids for parallel corridors
# Add this info on and use alternate_df in make_analysis_data()
SELECTED_DATE = dt.date(2022, 1, 6)

trips = (tbl.views.gtfs_schedule_fact_daily_trips()
         >> filter(_.service_date == SELECTED_DATE, _.is_in_service == True)
         >> select(_.calitp_itp_id, _.service_date, 
                   _.route_id, _.trip_key)
)

dim_trips = (tbl.views.gtfs_schedule_dim_trips()
             >> select(_.calitp_itp_id, _.route_id, 
                       _.shape_id, _.trip_key)
)

shapes = (trips 
          >> inner_join(_, dim_trips, 
                        on = ["calitp_itp_id", "trip_key", "route_id"]
                       )
          >> select(_.calitp_itp_id, _.route_id, _.shape_id)
          >> distinct()
          >> collect()
         )
'''

'\n# Need route_ids for parallel corridors\n# Add this info on and use alternate_df in make_analysis_data()\nSELECTED_DATE = dt.date(2022, 1, 6)\n\ntrips = (tbl.views.gtfs_schedule_fact_daily_trips()\n         >> filter(_.service_date == SELECTED_DATE, _.is_in_service == True)\n         >> select(_.calitp_itp_id, _.service_date, \n                   _.route_id, _.trip_key)\n)\n\ndim_trips = (tbl.views.gtfs_schedule_dim_trips()\n             >> select(_.calitp_itp_id, _.route_id, \n                       _.shape_id, _.trip_key)\n)\n\nshapes = (trips \n          >> inner_join(_, dim_trips, \n                        on = ["calitp_itp_id", "trip_key", "route_id"]\n                       )\n          >> select(_.calitp_itp_id, _.route_id, _.shape_id)\n          >> distinct()\n          >> collect()\n         )\n'

In [3]:
'''
transit_routes = catalog.transit_routes.read()

df = pd.merge(transit_routes,
              shapes,
              on = ["calitp_itp_id", "shape_id"],
              # Outer join shows there are left_only and right_only obs
              # But, can only do stuff with full info
              how = "inner",
              # There are some obs where same shape_id is linked to multiple route_id
              # Allow for 1:m
              validate = "1:m",
).rename(columns = {"calitp_itp_id": "itp_id"})
'''

'\ntransit_routes = catalog.transit_routes.read()\n\ndf = pd.merge(transit_routes,\n              shapes,\n              on = ["calitp_itp_id", "shape_id"],\n              # Outer join shows there are left_only and right_only obs\n              # But, can only do stuff with full info\n              how = "inner",\n              # There are some obs where same shape_id is linked to multiple route_id\n              # Allow for 1:m\n              validate = "1:m",\n).rename(columns = {"calitp_itp_id": "itp_id"})\n'

In [4]:
'''
create_parallel_corridors.make_analysis_data(
    hwy_buffer_feet=50, alternate_df = df,
    pct_route_threshold = 0.3, pct_highway_threshold = 0.1,
    DATA_PATH = DATA_PATH, FILE_NAME = "routes_on_shn"
)
'''

'\ncreate_parallel_corridors.make_analysis_data(\n    hwy_buffer_feet=50, alternate_df = df,\n    pct_route_threshold = 0.3, pct_highway_threshold = 0.1,\n    DATA_PATH = DATA_PATH, FILE_NAME = "routes_on_shn"\n)\n'

In [5]:
df = gpd.read_parquet(f"{DATA_PATH}routes_on_shn.parquet")

# Only keep parallel routes
df = df[df.parallel == 1].reset_index(drop=True)

# Draw highways with 250 ft buffer
highways = gpd.read_parquet(f"{DATA_PATH}highways.parquet")

# Bring in service hours increase data
service = (pd.read_parquet(f"{utils.GCS_FILE_PATH}2022_Jan/service_increase.parquet")
           .rename(columns = {"calitp_itp_id": "itp_id"})
          )

Address duplicates.

Duplicates mean same `shape_id`, but multiple `route_id` values.

It's allowed up until now...but should it be allowed for aggregation?
Will it be double-counting? Leave for now...but might need to get rid of duplicates...

In [6]:
service_increase = pd.merge(service, 
                            df, 
                            on = ["itp_id", "shape_id"],
                            how = "outer",
                            validate = "m:m",
                            indicator=True
                           )

service_increase._merge.value_counts()

left_only     256720
both           15472
right_only        34
Name: _merge, dtype: int64

In [7]:
service_increase = pd.merge(service, 
                            df, 
                            on = ["itp_id", "shape_id"],
                            how = "inner",
                            validate = "m:m",
                           )

In [8]:
print(f"# unique route ids originally: {df.route_id.nunique()}")
print(f"# unique route ids with service hrs info: {service_increase.route_id.nunique()}")

# unique route ids originally: 295
# unique route ids with service hrs info: 273


In [9]:
sum_cols = ["additional_trips", "service_hrs", 
            "addl_service_hrs", "service_hours_annual", 
            "addl_service_hrs_annual"
           ]

# This aggregation is probably what is the output?
# But, let's make some charts first
a1 = geography_utils.aggregate_by_geography(service_increase,
                                       group_cols = ["itp_id", "day_name", "tract_type"],
                                       sum_cols = sum_cols,
                                      )

In [10]:
print(f"# operators: {service_increase.itp_id.nunique()}")
print(f"# operators-routes: {len(service_increase[['itp_id', 'route_id']].drop_duplicates())}")

# operators: 91
# operators-routes: 324


### Missing Foothill Transit (itp_id 112)

Known issue, URL changed: waiting for it to be back-filled


### Charts by Service Type and Departure Hour
* Look at the 910, which runs on 110 freeway
* Seems like our estimator is just adding so that it hits 4 trips per hour
* But, at most, it runs 3x an hour during peak hours
* Let's do a chart to see by departure hour, how many additional trips or service hours
* Maybe we want to subset down to smaller window of departure hour

In [11]:
import altair as alt

from shared_utils import styleguide
from shared_utils import calitp_color_palette as cp
from IPython.display import Markdown

# Set charting style guide
alt.themes.register("calitp_theme", styleguide.calitp_theme)
alt.themes.enable("calitp_theme")
alt.data_transformers.enable('default', max_rows=10_000)

DataTransformerRegistry.enable('default')

In [12]:
def additional_service_by_hour(df, itp_id = 182, route = "910-13153"):
    subset = (df[(df.itp_id == itp_id) & (df.route_id == route)]
              .drop(columns = "geometry")
             )
    
    hwy_route = subset.Route.iloc[0]
    
    base = (alt.Chart(subset)
             .mark_bar()
             .encode(
                 x=alt.X("departure_hour:O", title="departure hour"),
                 color=alt.Color("day_name:N", title="Service Type", 
                                 scale = alt.Scale(
                                     domain = ["Thursday", "Saturday", "Sunday"],
                                     range = cp.CALITP_CATEGORY_BRIGHT_COLORS
                                 )
                                )
             )
    )
    
    trips = (base
             .encode(
                 y=alt.Y("additional_trips:Q", title="Additional Trips"),
                 tooltip = ["departure_hour", "additional_trips", "day_name"]
             ).interactive()
    )
        
    hours = (base
             .encode(
                 y=alt.Y("addl_service_hrs:Q", title="Additional Service Hrs"),
                 tooltip = ["departure_hour", "addl_service_hrs", "day_name"]
             ).interactive()
    )
    
    trips = (styleguide.preset_chart_config(trips)
             .properties(title={
                 "text": "Additional Trips by Service Type",
                 "subtitle": f"ID: {itp_id}   Route: {route}"
             })
            )
    hours = (styleguide.preset_chart_config(hours)
             .properties(title={
                 "text": "Additional Service Hours by Service Type",
                 "subtitle": f"ID: {itp_id}   Route: {route}"
             })
            )
     
    display(
        Markdown(
            f"#### ITP ID: {itp_id}   Route: {route} runs along the {hwy_route} freeway"
        )
    )
    display(trips, hours)

In [13]:
route_pairs = {
    # LA Metro
    182: ["910-13153", "501-13153", "487-13153"],
    # San Jose
    294: ["Express 104", "Rapid 522", "Express 102"],
    # SD
    278: ["280", "950", "894"],
}

In [14]:
for itp_id, route_list in route_pairs.items():
    for r in route_list:
        additional_service_by_hour(service_increase, itp_id = itp_id, route = r)

#additional_service_by_hour(service_increase, 
#                           itp_id = 182, route = "910-13153")

#### ITP ID: 182   Route: 910-13153 runs along the 110 freeway

#### ITP ID: 182   Route: 501-13153 runs along the 134 freeway

#### ITP ID: 182   Route: 487-13153 runs along the 10 freeway

#### ITP ID: 294   Route: Express 104 runs along the 237 freeway

#### ITP ID: 294   Route: Rapid 522 runs along the 82 freeway

#### ITP ID: 294   Route: Express 102 runs along the 85 freeway

#### ITP ID: 278   Route: 280 runs along the 15 freeway

#### ITP ID: 278   Route: 950 runs along the 905 freeway

#### ITP ID: 278   Route: 894 runs along the 94 freeway

In [15]:
MAP_ME = {
    182: "LA Metro", 
    294: "SJ Valley Transportation Authority", 
    279: "BART", 
    282: "SF Muni",
    278: "SD Metropolitan Transit System", 
    112: "Foothill Transit",
}

for itp_id, operator in MAP_ME.items():
    subset = df[df.itp_id==itp_id]
    print(f"{itp_id}: {operator}")
    print("**************************************************")
    cols = ["route_id", "total_routes", "Route", "RouteType",
            "County", "District", 
            "pct_route", "pct_highway"
           ]
    display(subset[cols])

182: LA Metro
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway
98,150-13153,119,27,State,LA,7,0.423,0.293
168,501-13153,119,134,State,LA,7,0.517,0.69
188,487-13153,119,10,Interstate,LA,7,0.429,0.142
208,577-13153,119,605,Interstate,LA,7,0.639,0.722
227,550-13153,119,110,Interstate,LA,7,0.356,0.364
228,910-13153,119,110,Interstate,LA,7,0.42,0.693
269,232-13153,119,1,State,LA,7,0.639,0.259
270,534-13153,119,1,State,LA,7,0.877,0.335


294: SJ Valley Transportation Authority
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway
13,104,92,237,State,SCL,4,0.328,0.659
14,Express 104,92,237,State,SCL,4,0.328,0.659
44,22,92,82,State,SCL,4,0.637,0.963
45,522,92,82,State,SCL,4,0.632,0.966
46,Rapid 522,92,82,State,SCL,4,0.632,0.966
113,102,92,85,State,SCL,4,0.437,0.576
114,Express 102,92,85,State,SCL,4,0.437,0.576
178,101,92,280,Interstate,SCL,4,0.379,0.454
179,103,92,280,Interstate,SCL,4,0.631,0.862
180,Express 101,92,280,Interstate,SCL,4,0.379,0.454


279: BART
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway


282: SF Muni
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway


278: SD Metropolitan Transit System
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway
214,225,101,805,Interstate,SD,11,0.325,0.279
344,235,101,15,Interstate,SD,11,0.536,0.379
345,235,101,15,State,SD,11,0.449,0.915
346,280,101,15,State,SD,11,0.552,0.983
347,290,101,15,State,SD,11,0.439,0.583
351,110,101,163,State,SD,11,0.538,0.966
352,20,101,163,State,SD,11,0.325,0.856
353,280,101,163,State,SD,11,0.317,0.964
354,290,101,163,State,SD,11,0.425,0.964
355,901,101,75,State,SD,11,0.54,0.892


112: Foothill Transit
**************************************************


Unnamed: 0,route_id,total_routes,Route,RouteType,County,District,pct_route,pct_highway


## Make map of these parallel routes for CA

Double check that these are as expected before calculating additional trips, buses, capital expenditures, etc

In [16]:
def data_to_plot(df):
    keep_cols = ["itp_id", "route_id", 
                 "Route", "County", "District", "RouteType",
                 "pct_route", "pct_highway", "parallel",
                 "geometry"
                ]
    df = df[keep_cols].reset_index(drop=True)
    df = df.assign(
        geometry = df.geometry.buffer(200).simplify(tolerance=100),
    )

    return df

to_map = data_to_plot(df)
hwy_df = highways

# Set various components for map
hwys_popup_dict = {
    "Route": "Highway Route",
    "RouteType": "Route Type",
    "County": "County"   
}

transit_popup_dict = {
    "itp_id": "Operator ITP ID",
    "route_id": "Route ID",
    "pct_route": "% overlapping route",
    "pct_highway": "% overlapping highway",
}

hwys_color = branca.colormap.StepColormap(
    colors=["black", "gray"],
)

colorscale = branca.colormap.StepColormap(
    colors=[
        cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue
        cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange
    ],
)


LAYERS_DICT = {
    "Highways": {"df": hwy_df,
        "plot_col": "Route",
        "popup_dict": hwys_popup_dict, 
        "tooltip_dict": hwys_popup_dict,
        "colorscale": hwys_color,
    },
    "Transit Routes": {"df": to_map,
        "plot_col": "parallel",
        "popup_dict": transit_popup_dict, 
        "tooltip_dict": transit_popup_dict,
        "colorscale": colorscale,
    },
}
    
LEGEND_URL = (
    "https://raw.githubusercontent.com/cal-itp/data-analyses/"
    "main/bus_service_increase/"
    "img/legend_intersecting_parallel.png"
)

LEGEND_DICT = {
    "legend_url": LEGEND_URL,
    "legend_bottom": 85,
    "legend_left": 5,
}


fig = map_utils.make_folium_multiple_layers_map(
    LAYERS_DICT,
    fig_width = 700, fig_height = 700, 
    zoom = map_utils.REGION_CENTROIDS["CA"]["zoom"], 
    centroid = map_utils.REGION_CENTROIDS["CA"]["centroid"], 
    title=f"Parallel Transit Routes to SHN",
    legend_dict = LEGEND_DICT
)
    
#display(fig)
#fig.save(f"{IMG_PATH}parallel_{operator_name}.html")
#print(f"{operator_name} map saved")