In [1]:
import datetime
import _speed_utils as speed_utils
import dask.dataframe as dd
import numpy as np
import geopandas as gpd
import pandas as pd
from segment_speed_utils import gtfs_schedule_wrangling, helpers, segment_calcs,sched_rt_utils
from segment_speed_utils.project_vars import (
    COMPILED_CACHED_VIEWS,
    PROJECT_CRS,
    SEGMENT_GCS,
    analysis_date,
)
from scripts import (A1_sjoin_vp_segments, A2_valid_vehicle_positions)
from shared_utils import calitp_color_palette as cp


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
analysis_date = '2023-07-12'

In [4]:
# Flagged: all the rows in the df flagged
# divide_by_zero: only the rows that have 0 for meters and sec elapsed
# trips_count: % of trips with 1+ division by 0 row for a route
# route_most_populated_df: the trip for a route with the smallest % of rows that are divided by 0
flagged, divide_by_zero, trips_count, route_most_populated_df = speed_utils.flagging_stage(analysis_date)

2023-07-20 15:47:17.974564
There are 2498742 rows in the original dataframe
sec_cat      meters_cat    
sec is avg   meters is avg     1886119
             meters is high      79418
             meters is low      131688
sec is high  meters is avg       61187
             meters is high      45520
             meters is low       18238
sec is low   meters is low      276572
dtype: int64
ok                 78.66
division by 0      11.07
meters too low      6.00
seconds too high    4.27
Name: flag, dtype: float64
23779 unique trips flagged.
2494 routes flagged out of 4087.
61.02275507707364 routes have 1+ row that has zeroes for meters/sec elapsed
14 operators are not flagged.
38.97724492292635% of routes have 1+ division by 0 row
Took 0:04:13.320070


In [5]:
# all_ops = list(flagged._gtfs_dataset_name.sort_values().unique())

### Filtering out for Big Blue Bus Only

In [6]:
big_blue_bus = "Big Blue Bus VehiclePositions"

In [28]:
bbb_only = flagged[flagged._gtfs_dataset_name == big_blue_bus].reset_index(drop = True)

In [8]:
bbb_routes = list(flagged.shape_array_key.unique())

In [9]:
flagged.shape

(2498742, 22)

In [10]:
flagged.shape_array_key.nunique()

4087

In [11]:
# divide_by_zero = divide_by_zero[divide_by_zero._gtfs_dataset_name == big_blue_bus].reset_index(drop = True)

In [12]:
f"{len(divide_by_zero)} or {len(divide_by_zero)/len(flagged)*100} are divided by 0."

'276572 or 11.068449643860792 are divided by 0.'

In [13]:
divide_by_zero.shape_array_key.nunique()

2494

In [14]:
trips_count = trips_count[trips_count.shape_array_key.isin(bbb_routes)].reset_index(drop = True)

In [15]:
route_most_populated_df = route_most_populated_df[route_most_populated_df.shape_array_key.isin(bbb_routes)].reset_index(drop = True)

### Stage 3 

In [16]:
stage3 = speed_utils.flag_stage3(divide_by_zero, analysis_date)

2023-07-20 15:51:32.300176
check in stage 2                   253015
repeated timestamps & locations     23545
repeated locations                     12
Name: stage3_flag, dtype: int64
Have to check 91.48250726754696 % of rows in stage 2
Took 0:00:37.640121


In [17]:
sort_by_cols = ['_gtfs_dataset_name','shape_array_key','trip_id','stop_sequence']

In [19]:
preview_cols = sort_by_cols + ['stop_id','gtfs_dataset_key','location_timestamp_local','pair','stage3_flag']

In [20]:
stage3 = stage3.sort_values(by = sort_by_cols).reset_index(drop = True)

In [22]:
stage3.sample()

Unnamed: 0,shape_array_key,stop_sequence,gtfs_dataset_key,stop_id,loop_or_inlining,p50_mph,n_trips,p20_mph,p80_mph,time_of_day,_gtfs_dataset_name,trip_id,min_time,min_dist,max_time,max_dist,meters_elapsed,sec_elapsed,speed_mph,meters_cat,sec_cat,flag,schedule_gtfs_dataset_key,trip_instance_key,location_timestamp,location_timestamp_local,pair,number_of_repeated_timestamps,number_of_repeated_locs,stage3_flag
213625,b7ed7b6ed70a7553118c8e065b79cb8d,49,db56b50ab86b5f7a4ae2fc2dd9889bbe,331,0,3.14,21,0.46,6.86,all_day,Sacramento Vehicle Positions,1101557,2023-07-12 13:21:11,0.0,2023-07-12 13:21:11,0.0,0.0,0.0,,meters is low,sec is low,division by 0,43a1e46d592a1ee647bce8422c68460c,25313a7cd0553eece507e829b9264037,2023-07-12 20:21:11+00:00,2023-07-12 13:21:11,-121.495766/38.58214,2.0,2.0,repeated timestamps & locations


In [23]:
stage3[stage3.shape_array_key == "b7ed7b6ed70a7553118c8e065b79cb8d"][preview_cols]

Unnamed: 0,_gtfs_dataset_name,shape_array_key,trip_id,stop_sequence,stop_id,gtfs_dataset_key,location_timestamp_local,pair,stage3_flag
213438,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,16,1901,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:36:29,-121.451164/38.54202,check in stage 2
213439,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,19,1905,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:38:48,-121.45553/38.546833,check in stage 2
213440,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,21,1907,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:39:33,-121.462166/38.546894,check in stage 2
213441,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,25,1914,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:43:40,-121.47143/38.554996,check in stage 2
213442,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,34,1923,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:50:10,-121.49152/38.56141,check in stage 2
213443,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,38,1940,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:52:38,-121.50116/38.564003,check in stage 2
213444,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,44,326,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:55:50,-121.49831/38.57585,check in stage 2
213445,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101539,47,329,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:57:33,-121.49721/38.578247,check in stage 2
213446,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101540,13,1896,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:13:12,-121.44682/38.534096,check in stage 2
213447,Sacramento Vehicle Positions,b7ed7b6ed70a7553118c8e065b79cb8d,1101540,17,1902,db56b50ab86b5f7a4ae2fc2dd9889bbe,2023-07-12 19:15:49,-121.45217/38.543858,check in stage 2


In [33]:
stage3[stage3.shape_array_key == "01355ed168ae609409fdb997c0b65f48"][preview_cols]

Unnamed: 0,_gtfs_dataset_name,shape_array_key,trip_id,stop_sequence,stop_id,gtfs_dataset_key,location_timestamp_local,pair,stage3_flag
0,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904997,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 19:32:46,-118.46386/34.056263,repeated timestamps & locations
1,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904997,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 19:44:39,-118.46386/34.056263,repeated locations
2,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:47:37,-118.46409/34.056393,check in stage 2
3,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:24:26,-118.46376/34.056126,repeated timestamps & locations
4,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904966,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 20:06:47,-118.46409/34.05641,check in stage 2
5,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904966,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 19:54:45,-118.46364/34.056072,repeated timestamps & locations
6,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904986,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 16:56:17,-118.46373/34.056126,repeated timestamps & locations
7,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904986,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 17:22:55,-118.46385/34.05627,check in stage 2
8,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904966,2,1589,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 19:54:45,-118.46364/34.056072,repeated timestamps & locations
9,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904966,2,1589,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 20:07:40,-118.46711/34.055077,check in stage 2


In [30]:
bbb_stage3 = speed_utils.flag_stage3(bbb_only, analysis_date)

2023-07-20 15:58:24.461833
repeated timestamps & locations    27579
check in stage 2                   26732
repeated locations                   990
Name: stage3_flag, dtype: int64
Have to check 48.33908970904685 % of rows in stage 2
Took 0:00:06.943896


In [35]:
bbb_stage3.sort_values(by = sort_by_cols)[preview_cols].drop_duplicates(subset = ['shape_array_key']).head(100)

Unnamed: 0,_gtfs_dataset_name,shape_array_key,trip_id,stop_sequence,stop_id,gtfs_dataset_key,location_timestamp_local,pair,stage3_flag
4,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904966,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 20:06:47,-118.46409/34.05641,check in stage 2
154,Big Blue Bus VehiclePositions,080f585295228f8c8f52cb373b1685cc,903018,1,1512,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 05:58:02,-118.49252/34.015926,repeated timestamps & locations
173,Big Blue Bus VehiclePositions,0c4b7d79c8b43bf40199a622514941dd,905550,2,673,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 17:49:13,-118.50293/34.0404,check in stage 2
523,Big Blue Bus VehiclePositions,0dd7a7c593c82cd0141235fe6a930324,904317,1,1328,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 20:00:39,-118.44026/34.068424,repeated timestamps & locations
712,Big Blue Bus VehiclePositions,165ee71bd909bcac24ed525133beac46,904116,2,1313,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 07:50:54,-118.31632/34.061733,check in stage 2
928,Big Blue Bus VehiclePositions,1a93c5913a225e29e1532307aa2fc7f1,905460,2,1313,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:21:56,-118.318855/34.06175,check in stage 2
2032,Big Blue Bus VehiclePositions,1b7456f404325a39e5efef88fbf9dd56,904073,1,1302,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 06:50:11,-118.30965/34.0618,repeated timestamps & locations
2317,Big Blue Bus VehiclePositions,1b7d4195f80f1bec36c7eaff178aefc9,903736,2,1361,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 07:13:52,-118.40946/34.029255,check in stage 2
2374,Big Blue Bus VehiclePositions,21a802700dadbc2e249b595773a50363,905392,1,1184,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:13:56,-118.44488/34.069515,check in stage 2
4085,Big Blue Bus VehiclePositions,26cf9105aaf5efa345fe565befc7b67d,905235,11,629,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 06:40:41,-118.45221/34.053787,check in stage 2


In [36]:
bbb_stage3[(bbb_stage3.shape_array_key == "01355ed168ae609409fdb997c0b65f48") & (bbb_stage3.trip_id == "904999")][preview_cols]

Unnamed: 0,_gtfs_dataset_name,shape_array_key,trip_id,stop_sequence,stop_id,gtfs_dataset_key,location_timestamp_local,pair,stage3_flag
2,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:47:37,-118.46409/34.056393,check in stage 2
3,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,1,1588,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:24:26,-118.46376/34.056126,repeated timestamps & locations
12,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,2,1589,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:24:26,-118.46376/34.056126,repeated timestamps & locations
13,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,2,1589,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:48:29,-118.467026/34.055122,check in stage 2
21,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,4,576,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:50:43,-118.47218/34.05034,repeated timestamps & locations
22,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,4,576,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:49:58,-118.47093/34.051964,check in stage 2
23,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,5,945,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:50:43,-118.47218/34.05034,repeated timestamps & locations
32,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,6,241,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:52:56,-118.467705/34.044216,repeated timestamps & locations
33,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,6,241,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:52:11,-118.467705/34.044216,repeated timestamps & locations
37,Big Blue Bus VehiclePositions,01355ed168ae609409fdb997c0b65f48,904999,7,942,6c2d7daaf979779fa2089c6395baf98b,2023-07-12 18:52:56,-118.467705/34.044216,repeated timestamps & locations
