In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

from siuba import *
import pandas as pd
import geopandas as gpd
import datetime as dt

import rt_analysis as rt
import rt_filter_map_plot



# Checking if intermediate data is available

   * First, decide on a date of interest. Let's use October 8, 2022
   * The rt.get_operators function takes the analysis date as a datetime.date object, which we'll construct below
   * It also takes a list of operators (currently itp_ids) of interest
   * Let's start with Big Blue Bus (300)

In [2]:
rt.get_operators?

[0;31mSignature:[0m
[0mrt[0m[0;34m.[0m[0mget_operators[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0manalysis_date[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moperator_list[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgenerate_new[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpbar[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Function for checking the existence of rt_trips and stop_delay_views in GCS for operators on a given day.

analysis_date: datetime.date
operator_list: list of itp_id's
generate_new: 'True' to generate OperatorDayAnalysis and export to GCS, 'False' to not generate
pbar: tqdm.notebook.tqdm(), optional progress bar for generation
[0;31mFile:[0m      ~/data-analyses/rt_delay/rt_analysis.py
[0;31mType:[0m      function


In [3]:
analysis_date = dt.date(2022, 10, 8)

In [4]:
rt.get_operators(analysis_date, [300])

not yet run: 300


{300: 'not_yet_run'}

Function returns a dictionary of the status of each itp_id passed (in addition to printing it out). We can see that 300 is not yet run on our date of interest.

# Generating intermediate data

Next, let's call the function again with `generate_new=True`, which will process the raw data using `rt.OperatorDayAnalysis`. Let's also give the function a tqdm progress bar using the `pbar` attribute.

_if you're running this notebook yourself, this date+operator might be already ran! feel free to try another if you're curious about the generation process, or skip to the next section._

In [5]:
from tqdm.notebook import tqdm

In [6]:
pbar = tqdm()

0it [00:00, ?it/s]

In [7]:
rt.get_operators(analysis_date, [300], generate_new=True, pbar=pbar)

calculating for agency: 300...
getting trips...
complete for agency: 300


{300: 'newly_run'}

Notice how the status returned by `get_operators` has changed to 'newly_run'. This means that this day and operator now has intermediate data available for the next steps.

# Generate a speedmap with RtFilterMapper

   * First, we'll construct an RtFilterMapper object from intermediate data using `rt_filter_map_plot.from_gcs()`
   * Be sure to check first with `rt.get_operators` that data is available. Otherwise, you'll get an error

In [9]:
rt_filter_map_plot.from_gcs?

[0;31mSignature:[0m [0mrt_filter_map_plot[0m[0;34m.[0m[0mfrom_gcs[0m[0;34m([0m[0mitp_id[0m[0;34m,[0m [0manalysis_date[0m[0;34m,[0m [0mpbar[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Generates RtFilterMapper from cached artifacts in GCS. Generate using rt_analysis.OperatorDayAnalysis.export_views_gcs()
    
[0;31mFile:[0m      ~/data-analyses/rt_delay/rt_filter_map_plot.py
[0;31mType:[0m      function


In [10]:
rt_day = rt_filter_map_plot.from_gcs(300, analysis_date)

found parquet


In [14]:
no_data = rt_filter_map_plot.from_gcs(300, dt.date(2022, 12, 20)) #we haven't ran this date, so this will give an error

FileNotFoundError: calitp-analytics-data/data-analyses/rt_delay/rt_trips/300_12_20.parquet

## Key Attributes: information and dataframes

RtFilterMapper

In [21]:
rt_day.calitp_agency_name

'Big Blue Bus'

In [22]:
rt_day.calitp_itp_id

300

In [15]:
rt_day.rt_trips >> head(3)

Unnamed: 0,calitp_itp_id,calitp_url_number,service_date,trip_key,trip_id,route_id,direction_id,shape_id,calitp_extracted_at,calitp_deleted_at,route_type,route_short_name,route_long_name,route_desc,median_time,direction,mean_speed_mph,calitp_agency_name
0,300,0,2022-10-08,-4256251627035452947,886062,3510,1,25990,2022-09-01,2099-01-01,3,7,Pico Blvd,,11:02:28.500000,Westbound,12.950541,Big Blue Bus
1,300,0,2022-10-08,-1298289206159871761,886106,3510,1,25990,2022-09-01,2099-01-01,3,7,Pico Blvd,,22:59:29.500000,Westbound,8.583352,Big Blue Bus
2,300,0,2022-10-08,-7326266918698775824,886095,3510,1,25990,2022-09-01,2099-01-01,3,7,Pico Blvd,,19:04:53,Westbound,12.143031,Big Blue Bus


In [16]:
rt_day.stop_delay_view >> head(3)

Unnamed: 0,shape_meters,stop_id,stop_name,geometry,shape_id,trip_key,trip_id,stop_sequence,arrival_time,route_id,route_short_name,direction_id,actual_time,delay_seconds
0,341.090795,1302,WILSHIRE WB & WESTERN FS (Wilshire/Western Sta),POINT (156061.835 -437929.534),25990,-3.648052e+18,886057,1.0,2022-10-08 09:14:00,3510,7,1,2022-10-08 09:12:55.136675,0.0
1,1270.261846,1313,CRENSHAW SB & WILSHIRE FS,POINT (155142.387 -437962.859),25990,-3.648052e+18,886057,2.0,2022-10-08 09:15:57,3510,7,1,2022-10-08 09:15:47.746704,0.0
2,2129.546177,1314,CRENSHAW SB & OLYMPIC FS,POINT (154801.889 -438751.286),25990,-3.648052e+18,886057,3.0,2022-10-08 09:17:47,3510,7,1,2022-10-08 09:17:44.967692,0.0


In [19]:
rt_day.endpoint_delay_summary >> head(3)

Unnamed: 0,direction_id,route_id,arrival_hour,n_trips,mean_end_delay_seconds
0,0,3504,8.0,2,139.0
1,0,3504,9.0,3,97.0
2,0,3504,10.0,3,46.333333


In [18]:
rt_day.endpoint_delay_view >> head(3)

Unnamed: 0,shape_meters,stop_id,stop_name,geometry,shape_id,trip_key,trip_id,stop_sequence,arrival_time,route_id,route_short_name,direction_id,actual_time,delay_seconds,arrival_hour,mean_speed_mph
0,6302.085356,402,WESTWOOD NB & WEYBURN NS,POINT (143514.338 -438054.558),26007,4.14277e+18,886791,12.0,2022-10-08 09:42:28,3514,R12,0,2022-10-08 09:39:28.551401,0.0,9.0,15.940919
1,12421.457555,402,WESTWOOD NB & WEYBURN NS,POINT (143514.338 -438054.558),25968,8.314736e+18,884957,39.0,2022-10-08 09:20:06,3504,1,0,2022-10-08 09:19:38.814195,0.0,9.0,12.997392
2,6302.085356,402,WESTWOOD NB & WEYBURN NS,POINT (143514.338 -438054.558),26007,-4.031307e+18,886759,12.0,2022-10-08 19:52:28,3514,R12,0,2022-10-08 20:04:56.434874,748.0,19.0,5.248218


## Setting filter and generating a map

In [23]:
rt_day.set_filter?

[0;31mSignature:[0m
[0mrt_day[0m[0;34m.[0m[0mset_filter[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mstart_time[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mend_time[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mroute_names[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshape_ids[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdirection_id[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdirection[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtrip_ids[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mroute_types[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
start_time, end_time: string %H:%M, for example '11:00' and '14:00'
route_names: list or pd.Series of route_names (GTFS route_short_name)
direction_id: '0' or '1'
direc

In [24]:
rt_day.set_filter(start_time='06:00', end_time='09:00')

In [26]:
m = rt_day.segment_speed_map() # don't worry about the FutureWarning, it's upstream in Siuba. You may wish to hide this cell's output

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	

In [28]:
# m

## Advanced filtering

## Speed variability chart