# Identify GTFS shape overlap issues

Known issue from RT speedmaps: vehicle positions are "snapped" to shapes in wrong order when a route starts and ends in the same place. Calculated speeds are implausible and trips are dropped.

Goal: Identify how many shapes have this issue.

In [1]:
import calitp
import geopandas as gpd
import numpy as np
import pandas as pd
import shared_utils
import datetime as dt
from siuba import * 

import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) ## 800GB?

pd.set_option("display.max_columns", None)



In [2]:
# set testing parameters - test on known operator w/ issue (KART)
analysis_operator=148
analysis_dt= dt.date(2022, 6, 1)

In [3]:
help(shared_utils.gtfs_utils.get_route_shapes)

Help on function get_route_shapes in module shared_utils.gtfs_utils:

get_route_shapes(selected_date: str | datetime.date, itp_id_list: list[int] = None, get_df: bool = True, crs: str = 'EPSG:4326', trip_df: siuba.sql.verbs.LazyTbl | pandas.core.frame.DataFrame = None, custom_filtering: dict = None) -> geopandas.geodataframe.GeoDataFrame
    Return a subset of geography_utils.make_routes_gdf()
    to only have the `shape_id` values present on a selected day.
    
    geography_utils.make_routes_gdf() only selects based on calitp_extracted_at
    and calitp_deleted_at date range.
    
    Allow a pre-existing trips table to be supplied.
    If not, run a fresh trips query.
    
    Custom_filtering doesn't filter in the query (which relies on trips query),
    but can filter out after it's a gpd.GeoDataFrame



In [4]:
gdf = (shared_utils.gtfs_utils.get_route_shapes(selected_date = analysis_dt,itp_id_list = [analysis_operator]))

DatabaseError: (google.cloud.bigquery.dbapi.exceptions.DatabaseError) 500 Query exceeded limit for bytes billed: 5000000000. 16087252992 or higher required.

Location: us-west2
Job ID: d1305c0e-c36b-4ece-9307-8f0b3fbf9b32

[SQL: SELECT DISTINCT `anon_1`.`calitp_itp_id`, `anon_1`.`calitp_url_number`, `anon_1`.`shape_id` 
FROM (SELECT `anon_2`.`feed_key` AS `feed_key`, `anon_2`.`trip_key` AS `trip_key`, `anon_2`.`trip_id` AS `trip_id`, `anon_2`.`route_id` AS `route_id`, `anon_2`.`calitp_itp_id` AS `calitp_itp_id`, `anon_2`.`calitp_url_number` AS `calitp_url_number`, `anon_2`.`service_id` AS `service_id`, `anon_2`.`service_date` AS `service_date`, `anon_2`.`service_indicator` AS `service_indicator`, `anon_2`.`service_start_date` AS `service_start_date`, `anon_2`.`service_end_date` AS `service_end_date`, `anon_2`.`service_inclusion` AS `service_inclusion`, `anon_2`.`service_exclusion` AS `service_exclusion`, `anon_2`.`is_in_service` AS `is_in_service`, `anon_2`.`n_stops` AS `n_stops`, `anon_2`.`n_stop_times` AS `n_stop_times`, `anon_2`.`trip_first_departure_ts` AS `trip_first_departure_ts`, `anon_2`.`trip_last_arrival_ts` AS `trip_last_arrival_ts`, `anon_2`.`service_hours` AS `service_hours`, `anon_3`.`wheelchair_accessible` AS `wheelchair_accessible`, `anon_3`.`shape_id` AS `shape_id`, `anon_3`.`calitp_hash` AS `calitp_hash`, `anon_3`.`trip_headsign` AS `trip_headsign`, `anon_3`.`calitp_deleted_at` AS `calitp_deleted_at`, `anon_3`.`calitp_extracted_at` AS `calitp_extracted_at`, `anon_3`.`bikes_allowed` AS `bikes_allowed`, `anon_3`.`trip_short_name` AS `trip_short_name`, `anon_3`.`block_id` AS `block_id`, `anon_3`.`direction_id` AS `direction_id` 
FROM (SELECT `anon_4`.`feed_key` AS `feed_key`, `anon_4`.`trip_key` AS `trip_key`, `anon_4`.`trip_id` AS `trip_id`, `anon_4`.`route_id` AS `route_id`, `anon_4`.`calitp_itp_id` AS `calitp_itp_id`, `anon_4`.`calitp_url_number` AS `calitp_url_number`, `anon_4`.`service_id` AS `service_id`, `anon_4`.`service_date` AS `service_date`, `anon_4`.`service_indicator` AS `service_indicator`, `anon_4`.`service_start_date` AS `service_start_date`, `anon_4`.`service_end_date` AS `service_end_date`, `anon_4`.`service_inclusion` AS `service_inclusion`, `anon_4`.`service_exclusion` AS `service_exclusion`, `anon_4`.`is_in_service` AS `is_in_service`, `anon_4`.`n_stops` AS `n_stops`, `anon_4`.`n_stop_times` AS `n_stop_times`, `anon_4`.`trip_first_departure_ts` AS `trip_first_departure_ts`, `anon_4`.`trip_last_arrival_ts` AS `trip_last_arrival_ts`, `anon_4`.`service_hours` AS `service_hours` 
FROM (SELECT `anon_5`.`feed_key` AS `feed_key`, `anon_5`.`trip_key` AS `trip_key`, `anon_5`.`trip_id` AS `trip_id`, `anon_5`.`route_id` AS `route_id`, `anon_5`.`calitp_itp_id` AS `calitp_itp_id`, `anon_5`.`calitp_url_number` AS `calitp_url_number`, `anon_5`.`service_id` AS `service_id`, `anon_5`.`service_date` AS `service_date`, `anon_5`.`service_indicator` AS `service_indicator`, `anon_5`.`service_start_date` AS `service_start_date`, `anon_5`.`service_end_date` AS `service_end_date`, `anon_5`.`service_inclusion` AS `service_inclusion`, `anon_5`.`service_exclusion` AS `service_exclusion`, `anon_5`.`is_in_service` AS `is_in_service`, `anon_5`.`calitp_extracted_at` AS `calitp_extracted_at`, `anon_5`.`calitp_deleted_at` AS `calitp_deleted_at`, `anon_5`.`n_stops` AS `n_stops`, `anon_5`.`n_stop_times` AS `n_stop_times`, `anon_5`.`trip_first_departure_ts` AS `trip_first_departure_ts`, `anon_5`.`trip_last_arrival_ts` AS `trip_last_arrival_ts`, `anon_5`.`service_hours` AS `service_hours` 
FROM (SELECT `views.gtfs_schedule_fact_daily_trips`.`feed_key` AS `feed_key`, `views.gtfs_schedule_fact_daily_trips`.`trip_key` AS `trip_key`, `views.gtfs_schedule_fact_daily_trips`.`trip_id` AS `trip_id`, `views.gtfs_schedule_fact_daily_trips`.`route_id` AS `route_id`, `views.gtfs_schedule_fact_daily_trips`.`calitp_itp_id` AS `calitp_itp_id`, `views.gtfs_schedule_fact_daily_trips`.`calitp_url_number` AS `calitp_url_number`, `views.gtfs_schedule_fact_daily_trips`.`service_id` AS `service_id`, `views.gtfs_schedule_fact_daily_trips`.`service_date` AS `service_date`, `views.gtfs_schedule_fact_daily_trips`.`service_indicator` AS `service_indicator`, `views.gtfs_schedule_fact_daily_trips`.`service_start_date` AS `service_start_date`, `views.gtfs_schedule_fact_daily_trips`.`service_end_date` AS `service_end_date`, `views.gtfs_schedule_fact_daily_trips`.`service_inclusion` AS `service_inclusion`, `views.gtfs_schedule_fact_daily_trips`.`service_exclusion` AS `service_exclusion`, `views.gtfs_schedule_fact_daily_trips`.`is_in_service` AS `is_in_service`, `views.gtfs_schedule_fact_daily_trips`.`calitp_extracted_at` AS `calitp_extracted_at`, `views.gtfs_schedule_fact_daily_trips`.`calitp_deleted_at` AS `calitp_deleted_at`, `views.gtfs_schedule_fact_daily_trips`.`n_stops` AS `n_stops`, `views.gtfs_schedule_fact_daily_trips`.`n_stop_times` AS `n_stop_times`, `views.gtfs_schedule_fact_daily_trips`.`trip_first_departure_ts` AS `trip_first_departure_ts`, `views.gtfs_schedule_fact_daily_trips`.`trip_last_arrival_ts` AS `trip_last_arrival_ts`, `views.gtfs_schedule_fact_daily_trips`.`service_hours` AS `service_hours` 
FROM `views.gtfs_schedule_fact_daily_trips`) AS `anon_5` 
WHERE `anon_5`.`service_date` = %(service_date_1:DATE)s AND `anon_5`.`calitp_extracted_at` <= %(calitp_extracted_at_1:DATE)s AND `anon_5`.`calitp_deleted_at` >= %(calitp_deleted_at_1:DATE)s AND `anon_5`.`is_in_service` = true) AS `anon_4` 
WHERE `anon_4`.`calitp_itp_id` IN UNNEST(%(calitp_itp_id_1:INT64)s)) AS `anon_2` JOIN (SELECT DISTINCT `anon_6`.`calitp_itp_id` AS `calitp_itp_id`, `anon_6`.`calitp_url_number` AS `calitp_url_number`, `anon_6`.`route_id` AS `route_id`, `anon_6`.`service_id` AS `service_id`, `anon_6`.`trip_id` AS `trip_id`, `anon_6`.`shape_id` AS `shape_id`, `anon_6`.`trip_headsign` AS `trip_headsign`, `anon_6`.`trip_short_name` AS `trip_short_name`, `anon_6`.`direction_id` AS `direction_id`, `anon_6`.`block_id` AS `block_id`, `anon_6`.`wheelchair_accessible` AS `wheelchair_accessible`, `anon_6`.`bikes_allowed` AS `bikes_allowed`, `anon_6`.`calitp_extracted_at` AS `calitp_extracted_at`, `anon_6`.`calitp_hash` AS `calitp_hash`, `anon_6`.`trip_key` AS `trip_key`, `anon_6`.`calitp_deleted_at` AS `calitp_deleted_at` 
FROM (SELECT `views.gtfs_schedule_dim_trips`.`calitp_itp_id` AS `calitp_itp_id`, `views.gtfs_schedule_dim_trips`.`calitp_url_number` AS `calitp_url_number`, `views.gtfs_schedule_dim_trips`.`route_id` AS `route_id`, `views.gtfs_schedule_dim_trips`.`service_id` AS `service_id`, `views.gtfs_schedule_dim_trips`.`trip_id` AS `trip_id`, `views.gtfs_schedule_dim_trips`.`shape_id` AS `shape_id`, `views.gtfs_schedule_dim_trips`.`trip_headsign` AS `trip_headsign`, `views.gtfs_schedule_dim_trips`.`trip_short_name` AS `trip_short_name`, `views.gtfs_schedule_dim_trips`.`direction_id` AS `direction_id`, `views.gtfs_schedule_dim_trips`.`block_id` AS `block_id`, `views.gtfs_schedule_dim_trips`.`wheelchair_accessible` AS `wheelchair_accessible`, `views.gtfs_schedule_dim_trips`.`bikes_allowed` AS `bikes_allowed`, `views.gtfs_schedule_dim_trips`.`calitp_extracted_at` AS `calitp_extracted_at`, `views.gtfs_schedule_dim_trips`.`calitp_hash` AS `calitp_hash`, `views.gtfs_schedule_dim_trips`.`trip_key` AS `trip_key`, `views.gtfs_schedule_dim_trips`.`calitp_deleted_at` AS `calitp_deleted_at` 
FROM `views.gtfs_schedule_dim_trips`) AS `anon_6` 
WHERE `anon_6`.`calitp_itp_id` IN UNNEST(%(calitp_itp_id_2:INT64)s)) AS `anon_3` ON `anon_2`.`trip_key` = `anon_3`.`trip_key` AND `anon_2`.`trip_id` = `anon_3`.`trip_id` AND `anon_2`.`route_id` = `anon_3`.`route_id` AND `anon_2`.`service_id` = `anon_3`.`service_id` AND `anon_2`.`calitp_itp_id` = `anon_3`.`calitp_itp_id` AND `anon_2`.`calitp_url_number` = `anon_3`.`calitp_url_number`) AS `anon_1`]
[parameters: {'service_date_1': datetime.date(2022, 6, 1), 'calitp_extracted_at_1': datetime.date(2022, 6, 1), 'calitp_deleted_at_1': datetime.date(2022, 6, 1), 'calitp_itp_id_1': [148], 'calitp_itp_id_2': [148]}]
(Background on this error at: https://sqlalche.me/e/14/4xp6)