# Route identification (time-series)

Over time, even `route_ids` change. Pick out a couple of examples of this.

In [1]:
import pandas as pd
import yaml
from segment_speed_utils import time_series_utils
from shared_utils import portfolio_utils
from update_vars import GTFS_DATA_DICT, SCHED_GCS

with open("../_shared_utils/shared_utils/portfolio_organization_name.yml", "r") as f:
    PORTFOLIO_ORGANIZATIONS_DICT = yaml.safe_load(f)

In [None]:
import geopandas as gpd
from update_vars import GTFS_DATA_DICT, RT_SCHED_GCS, SCHED_GCS, SEGMENT_GCS
import google.auth

credentials, project = google.auth.default()

import gcsfs
from shared_utils import (
    portfolio_utils,
    publish_utils,
)
from calitp_data_analysis import geography_utils, utils

In [2]:
CLEANED_ROUTE_NAMING = GTFS_DATA_DICT.schedule_tables.route_identification

df = pd.read_parquet(f"{SCHED_GCS}{CLEANED_ROUTE_NAMING}.parquet").pipe(
    portfolio_utils.standardize_portfolio_organization_names,
    PORTFOLIO_ORGANIZATIONS_DICT,
)

In [3]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

## Find other routes with this issue
* [Link](https://github.com/cal-itp/data-analyses/issues/1527#issuecomment-2982251267)
* It's ok for recent_combined_name to group multiple route_ids only when route_short_name and route_long_name are the same, see the LA Metro route_id example.
* It is not ok to group them when route_short_name and route_long_name are different, and the time_series_utils operators that get extra parsing only had that to handle cases like:
* https://github.com/cal-itp/data-analyses/blob/9505ad4445de05119b78d4ad35ac89c23ed8a7dc/rt_segment_speeds/segment_speed_utils/time_series_utils.py#L84-L105

In [4]:
df.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,name,route_id,route_long_name,route_short_name,route_desc,service_date,combined_name,route_id2,recent_combined_name,recent_route_id2,portfolio_organization_name
0,7e015887964432c82ce7e735c2753f86,VCTC GMV Schedule,3402,Route 11,Route 11,PACIFIC VIEW MALL via TELEPHONE RD,2023-03-15,Route 11__Route 11,3402,Route 11__Route 11,3402,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)"
1,7e015887964432c82ce7e735c2753f86,VCTC GMV Schedule,3407,Route 18,Route 18,OHS TRIPPER - C ST - DORIS ST,2023-03-15,Route 18__Route 18,3407,Route 18__Route 18,3407,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)"


In [5]:
df.loc[df.recent_combined_name.str.contains("Coastal")][[
            "service_date",
            "recent_combined_name",
            "portfolio_organization_name",
            "route_long_name",
            "route_short_name",
        ]].tail(20)

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name
119289,2025-04-16,81B__80-89 Coastal Express,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)",80-89 Coastal Express,81B
119290,2025-04-16,84U__80-89 Coastal Express,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)",80-89 Coastal Express,84U
119291,2025-04-16,85C__80-89 Coastal Express,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)",80-89 Coastal Express,85C
119633,2025-04-16,__Coastal Express,Curry Public Transit,Coastal Express,
121367,2025-05-14,__Long Coastal,City of Laguna Beach,Long Coastal,
121368,2025-05-14,__Short Coastal,City of Laguna Beach,Short Coastal,
121369,2025-05-14,__Coastal Trolley,City of Laguna Beach,Coastal Trolley,
121686,2025-05-14,80__80-89 Coastal Express,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)",80-89 Coastal Express,80
121687,2025-05-14,86__80-89 Coastal Express,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)",80-89 Coastal Express,86
121693,2025-05-14,85__80-89 Coastal Express,"Ventura County (VCTC, Gold Coast, Cities of Camarillo, Moorpark, Ojai, Simi Valley, Thousand Oaks)",80-89 Coastal Express,85


In [6]:
m_route_ids_1_recent_combined_name = (
    df.groupby(
        [
            "service_date",
            "recent_combined_name",
            "portfolio_organization_name",
            "route_long_name",
            "route_short_name",
        ]
    )
    .agg({"route_id": "nunique"})
    .reset_index()
)

In [7]:
m_route_ids_1_recent_combined_name["same_name"] = (
    m_route_ids_1_recent_combined_name.route_long_name
    == m_route_ids_1_recent_combined_name.route_short_name
)

In [8]:
m_route_ids_1_recent_combined_name2 = m_route_ids_1_recent_combined_name.loc[
    (m_route_ids_1_recent_combined_name.route_id > 1)
    & (m_route_ids_1_recent_combined_name.same_name == False)
]

In [9]:
len(m_route_ids_1_recent_combined_name2)

341

### Amtrak doesn't have any `route_short_name` values.

In [10]:
def inspect_recent_rows(df: pd.DataFrame, route_long_name: str):
    df2 = (
        df.loc[df.route_long_name == route_long_name][
            [
                "service_date",
                "recent_combined_name",
                "portfolio_organization_name",
                "route_long_name",
                "route_short_name",
                "route_id",
            ]
        ]
        .sort_values(by=["service_date"])
        .drop_duplicates()
        .tail(10)
    )
    display(df2)
    return

In [11]:
m_route_ids_1_recent_combined_name2.loc[
    m_route_ids_1_recent_combined_name2.portfolio_organization_name == "Amtrak"
].route_short_name.unique()

array(['', 'THRU', 'CMTR'], dtype=object)

In [12]:
inspect_recent_rows(df, "Amtrak Thruway Connecting Service")

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name,route_id
121162,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42933
121159,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42920
121158,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,18650
121152,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,43
121155,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42896
121150,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42891
121148,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42954
121166,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42958
121157,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42912
121169,2025-05-14,__Amtrak Thruway Connecting Service,Amtrak,Amtrak Thruway Connecting Service,,42935


In [13]:
inspect_recent_rows(df, "Commuter Rail")

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name,route_id
110507,2025-01-15,__Commuter Rail,Amtrak,Commuter Rail,,42948
110511,2025-01-15,__Commuter Rail,Amtrak,Commuter Rail,,42985
113338,2025-02-12,__Commuter Rail,Amtrak,Commuter Rail,,42948
113341,2025-02-12,__Commuter Rail,Amtrak,Commuter Rail,,42985
115952,2025-03-12,__Commuter Rail,Amtrak,Commuter Rail,,42985
115960,2025-03-12,__Commuter Rail,Amtrak,Commuter Rail,,42948
118601,2025-04-16,__Commuter Rail,Amtrak,Commuter Rail,,42985
118603,2025-04-16,__Commuter Rail,Amtrak,Commuter Rail,,42948
121149,2025-05-14,__Commuter Rail,Amtrak,Commuter Rail,,42985
121151,2025-05-14,__Commuter Rail,Amtrak,Commuter Rail,,42948


### Check routes that aren't run by Amtrak

In [14]:
m_route_ids_1_recent_combined_name2.portfolio_organization_name.unique()

array(['City of Los Angeles', 'Amtrak',
       'Mission Bay Transportation Management Agency',
       'Palo Verde Valley Transit Agency',
       'San Luis Obispo Regional Transit Authority',
       'Los Angeles World Airports', 'City of Santa Monica'], dtype=object)

In [15]:
m_route_ids_1_recent_combined_name2.route_long_name.unique()

array(['CE438B', 'Amtrak Thruway Connecting Service', 'Commuter Rail', '',
       'Transbay-Caltrain', 'Blue Route', 'Gold Route', 'Red Route',
       'Green Route', 'Wellness Express', 'Silver Route',
       'Laguna Tripper', 'Maple Leaf', 'FlyAway - LAX to Union Station',
       'FlyAway - LAX to Van Nuys', 'FlyAway - Union Station to LAX',
       'FlyAway - Van Nuys to LAX', 'UCLA - Marina del Rey',
       '26th Street', 'Pacific Palisades', 'Downtown LA Freeway Express'],
      dtype=object)

In [16]:
m_route_ids_1_recent_combined_name2.loc[m_route_ids_1_recent_combined_name2.portfolio_organization_name != "Amtrak"].sort_values(
    by=["service_date", "route_id"], ascending=[False, False]
)

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name,route_id,same_name
115399,2025-05-14,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,2,False
115437,2025-05-14,1__Blue Route,Palo Verde Valley Transit Agency,Blue Route,1,2,False
115763,2025-05-14,2__Gold Route,Palo Verde Valley Transit Agency,Gold Route,2,2,False
115974,2025-05-14,3__Red Route,Palo Verde Valley Transit Agency,Red Route,3,2,False
116041,2025-05-14,438B__CE438B,City of Los Angeles,CE438B,438B,2,False
116045,2025-05-14,43__26th Street,City of Santa Monica,26th Street,43,2,False
116140,2025-05-14,4__Green Route,Palo Verde Valley Transit Agency,Green Route,4,2,False
116498,2025-05-14,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,2,False
116804,2025-05-14,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,2,False
117034,2025-05-14,LAX to US__FlyAway - LAX to Union Station,Los Angeles World Airports,FlyAway - LAX to Union Station,LAX to US,2,False


In [34]:
def process_transit_routes() -> gpd.GeoDataFrame:
    """
    Select the most recent transit route.
    Also count how many routes there are for each operator.
    """
    OPERATOR_ROUTE = GTFS_DATA_DICT.digest_tables.operator_routes_map

    
    op_geography_df = gpd.read_parquet(
        f"{RT_SCHED_GCS}{OPERATOR_ROUTE}.parquet",
        storage_options={"token": credentials.token},
    )

    most_recent_routes = publish_utils.filter_to_recent_date(
        df=op_geography_df,
        group_cols=[
            "portfolio_organization_name",
            "route_id"
        ],
    )

    # Calc length of route
    most_recent_routes = most_recent_routes.assign(
        route_length_feet=most_recent_routes.geometry.to_crs(
            geography_utils.CA_NAD83Albers_ft
        ).length
    )

    # Drop duplicates
    most_recent_routes = most_recent_routes.drop_duplicates(
        subset=["portfolio_organization_name", "recent_combined_name", "service_date", "route_id"]
    )
    return most_recent_routes

In [35]:
routes = process_transit_routes()

In [36]:
def display_map(gdf:gpd.GeoDataFrame, route:str):
    gdf = gdf.loc[gdf.recent_combined_name == route][["geometry","route_id"]]
    display(gdf.explore("route_id"))
    return

In [37]:
routes.loc[routes.recent_combined_name == "6 Wellness Express"].shape

(2, 27)

In [38]:
display_map(routes, "6 Wellness Express")

In [18]:
inspect_recent_rows(df, "Wellness Express")

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name,route_id
111081,2025-01-15,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,6467
111678,2025-01-15,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,5970
113628,2025-02-12,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,6467
114290,2025-02-12,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,5970
116224,2025-03-12,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,6467
116949,2025-03-12,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,5970
118880,2025-04-16,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,6467
119828,2025-04-16,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,5970
121814,2025-05-14,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,6467
122250,2025-05-14,6__Wellness Express,Palo Verde Valley Transit Agency,Wellness Express,6,5970


In [39]:
display_map(routes, "18 UCLA - Marina del Rey")

In [17]:
inspect_recent_rows(df, "UCLA - Marina del Rey")

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name,route_id
103670,2024-11-13,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3770
106458,2024-12-11,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3770
111244,2025-01-15,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3820
111737,2025-02-12,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3820
114892,2025-03-12,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,18
116532,2025-03-12,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3820
117548,2025-04-16,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,18
119308,2025-04-16,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3919
120126,2025-05-14,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,18
121832,2025-05-14,18__UCLA - Marina del Rey,City of Santa Monica,UCLA - Marina del Rey,18,3919


In [41]:
inspect_recent_rows(df, "Pacific Palisades")

Unnamed: 0,service_date,recent_combined_name,portfolio_organization_name,route_long_name,route_short_name,route_id
103663,2024-11-13,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3763
106451,2024-12-11,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3763
111237,2025-01-15,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3813
111730,2025-02-12,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3813
114885,2025-03-12,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,9
116525,2025-03-12,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3813
117541,2025-04-16,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,9
119301,2025-04-16,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3912
120119,2025-05-14,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,9
121825,2025-05-14,9__Pacific Palisades,City of Santa Monica,Pacific Palisades,9,3912


In [43]:
display_map(routes, "9 Pacific Palisades")