In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics

import shared_utils
import branca
from utils import *

from siuba import *
import pandas as pd
import geopandas as gpd
import shapely

import datetime as dt
import time
from zoneinfo import ZoneInfo

import rt_analysis as rt
import importlib



In [2]:
%%sql -o rt_coverage

# get all vehicle positions on selected dates, for the feed with itp id 170, and url number 0
SELECT calitp_itp_id, calitp_url_number, MIN(vehicle_timestamp) as min_timestamp, MAX(vehicle_timestamp) as max_timestamp
FROM `cal-itp-data-infra.gtfs_rt.vehicle_positions`
GROUP BY calitp_itp_id, calitp_url_number
# WHERE _FILE_NAME='gs://gtfs-data/rt-processed/vehicle_positions/vp_2021-11-30_290_0.parquet' ##utc, must bracket 2 days for 1 day pacific
#     OR _FILE_NAME='gs://gtfs-data/rt-processed/vehicle_positions/vp_2021-12-01_290_0.parquet'
ORDER BY max_timestamp DESC



Unnamed: 0,calitp_itp_id,calitp_url_number,min_timestamp,max_timestamp
0,282,1,1640817253,1642192269
1,194,0,1420160146,1642192268
2,106,0,1625804856,1642192268
3,183,0,1420070460,1642192268
4,380,0,1638643253,1642192268
...,...,...,...,...
59,170,0,1625803139,1642192250
60,294,0,1625804796,1640817236
61,290,0,1625804799,1640817217
62,167,0,1625804880,1640723712


In [3]:
## set system time
os.environ['TZ'] = 'America/Los_Angeles'
time.tzset()
time.tzname

('PST', 'PDT')

In [4]:
rt_coverage.max_timestamp = rt_coverage.max_timestamp.apply(convert_ts)
rt_coverage.min_timestamp = rt_coverage.min_timestamp.apply(convert_ts)

In [5]:
rt_coverage

Unnamed: 0,calitp_itp_id,calitp_url_number,min_timestamp,max_timestamp
0,282,1,2021-12-29 14:34:13,2022-01-14 12:31:09
1,194,0,2015-01-01 16:55:46,2022-01-14 12:31:08
2,106,0,2021-07-08 21:27:36,2022-01-14 12:31:08
3,183,0,2014-12-31 16:01:00,2022-01-14 12:31:08
4,380,0,2021-12-04 10:40:53,2022-01-14 12:31:08
...,...,...,...,...
59,170,0,2021-07-08 20:58:59,2022-01-14 12:30:50
60,294,0,2021-07-08 21:26:36,2021-12-29 14:33:56
61,290,0,2021-07-08 21:26:39,2021-12-29 14:33:37
62,167,0,2021-07-08 21:28:00,2021-12-28 12:35:12


In [6]:
air_view = pd.read_csv('airtable_rt_status.csv').rename(columns={'ITP ID': 'calitp_itp_id', 'GTFS Realtime Status': 'rt_status'})
air_view.calitp_itp_id = air_view.calitp_itp_id.fillna(9999).astype(int)

In [7]:
air_view.columns

Index(['Name', 'calitp_itp_id', 'Reporting Category', 'Caltrans District',
       'GTFS Static Status', 'rt_status', 'Missing Static',
       'Services Needing Alerts',
       'Services Needing TripUpdates or VehiclePositions'],
      dtype='object')

In [8]:
air_joined = air_view >> inner_join(_, rt_coverage, on = 'calitp_itp_id') >> distinct(_.calitp_itp_id, _keep_all=True)

In [9]:
air_joined.to_parquet('airtable_joined.parquet')

In [10]:
air_joined.to_csv('airtable_joined.csv')

In [11]:
air_joined.shape

(48, 12)

In [12]:
air_joined

Unnamed: 0,Name,calitp_itp_id,Reporting Category,Caltrans District,GTFS Static Status,rt_status,Missing Static,Services Needing Alerts,Services Needing TripUpdates or VehiclePositions,calitp_url_number,min_timestamp,max_timestamp
0,Long Beach Transit,170,Core,07 - Los Angeles,Static Incomplete,RT Incomplete,AquaLink,"Galaxy Express, AquaLink, UCLA/Westwood Commut...","Galaxy Express, AquaLink, UCLA/Westwood Commut...",0,2021-07-08 20:58:59,2022-01-14 12:30:50
1,Peninsula Corridor Joint Powers Board,246,Core,04 - Oakland,Static Incomplete,RT Incomplete,"Caltrain Shuttles, Caltrain Millbrae Express","Caltrain Shuttles, Caltrain Millbrae Express","Caltrain Shuttles, Caltrain Millbrae Express",1,2021-12-29 14:34:19,2022-01-14 12:31:00
2,City of Visalia,361,Core,06 - Fresno,Static Incomplete,RT Incomplete,V-LINE,"V-LINE, Sequoia Shuttle, Visalia Dial-A-Ride","V-LINE, Sequoia Shuttle, Visalia Dial-A-Ride",0,2015-01-03 02:55:42,2022-01-14 12:31:07
3,San Mateo County Transit District,290,Core,04 - Oakland,Static Incomplete,RT OK,"Bayshore Brisbane Senior Shuttle, SamTrans Shu...","Redi-Wheels, RediCoast, Bayshore Brisbane Seni...","Redi-Wheels, RediCoast, Bayshore Brisbane Seni...",1,2021-12-29 14:34:05,2022-01-14 12:30:58
4,Marin County Transit District,194,Core,04 - Oakland,Static Incomplete,RT OK,Muir Woods Shuttle,"Marin Access Paratransit, Muir Woods Shuttle, ...","Marin Access Paratransit, Muir Woods Shuttle, ...",0,2015-01-01 16:55:46,2022-01-14 12:31:08
5,Monterey-Salinas Transit,208,Core,05 - San Luis Obispo,Static Incomplete,RT OK,MST Senior Shuttles,"MST Monterey Trolley, MST RIDES, MST Taxi Vouc...","MST Monterey Trolley, MST RIDES, MST Taxi Vouc...",0,2021-07-08 20:57:45,2022-01-14 12:31:02
6,Napa Valley Transportation Authority,218,Core,04 - Oakland,Static OK,RT Incomplete,,"Calistoga Shuttle, Taxi Scrip, Vine Go, Americ...","Calistoga Shuttle, Taxi Scrip, Vine Go, Americ...",1,2021-12-29 14:34:17,2022-01-14 12:31:07
7,"Golden Gate Bridge, Highway and Transportation...",127,Core,04 - Oakland,Static OK,RT Incomplete,,"Golden Gate Ferry, Golden Gate Transit Paratra...","Golden Gate Ferry, Golden Gate Transit Paratra...",1,2021-12-29 14:34:12,2022-01-14 12:30:59
8,Transit Joint Powers Authority for Merced County,343,Core,10 - Stockton,Static OK,RT Incomplete,,"Merced The Micro Bus, Merced The Bus Paratransit","Merced The Bus, Merced The Micro Bus, Merced T...",0,2021-12-21 13:50:26,2022-01-14 12:31:08
9,Sacramento Regional Transit District,273,Core,03 - Marysville,Static OK,RT Incomplete,,"SacRT Light Rail, e-van, SacRT GO, Elk Grove T...","e-van, SacRT GO, SacRT Neighborhood Ride",0,2021-12-21 14:04:00,2022-01-14 12:31:00


In [6]:
rt_coverage >> inner_join(_, tbl.gtfs_schedule.agency() >> collect(), on = ['calitp_itp_id']) >> filter(_.calitp_itp_id == 290)

  return self.connectable.execute(*args, **kwargs)


Unnamed: 0,calitp_itp_id,calitp_url_number_x,min_timestamp,max_timestamp,calitp_url_number_y,agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email,calitp_extracted_at
125,290,1,2021-12-29 14:34:05,2022-01-13 15:38:11,0,samtrans-ca-us,SamTrans,http://www.samtrans.com,America/Los_Angeles,en,800-660-4287,,,2022-01-11
126,290,1,2021-12-29 14:34:05,2022-01-13 15:38:11,1,SM,SamTrans,http://www.samtrans.com,America/Los_Angeles,en,800-660-4287,,,2022-01-05
127,290,0,2021-07-08 21:26:39,2021-12-29 14:33:37,0,samtrans-ca-us,SamTrans,http://www.samtrans.com,America/Los_Angeles,en,800-660-4287,,,2022-01-11
128,290,0,2021-07-08 21:26:39,2021-12-29 14:33:37,1,SM,SamTrans,http://www.samtrans.com,America/Los_Angeles,en,800-660-4287,,,2022-01-05


In [29]:
rt_coverage >> inner_join(_, tbl.gtfs_schedule.agency() >> collect(), on = ['calitp_itp_id']) >> filter(_.calitp_itp_id == 182)

Unnamed: 0,calitp_itp_id,calitp_url_number_x,min_timestamp,max_timestamp,calitp_url_number_y,agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email,calitp_extracted_at
90,182,1,2022-01-12 10:53:39,2022-01-13 15:38:27,1,LACMTA_Rail,Metro - Los Angeles,https://www.metro.net,America/Los_Angeles,en,(323) 466-3876,,,2022-01-13
91,182,1,2022-01-12 10:53:39,2022-01-13 15:38:27,0,LACMTA,Metro - Los Angeles,https://www.metro.net,America/Los_Angeles,en,(323)466-3876,,,2021-12-18
92,182,0,2022-01-12 10:45:32,2022-01-13 15:38:25,1,LACMTA_Rail,Metro - Los Angeles,https://www.metro.net,America/Los_Angeles,en,(323) 466-3876,,,2022-01-13
93,182,0,2022-01-12 10:45:32,2022-01-13 15:38:25,0,LACMTA,Metro - Los Angeles,https://www.metro.net,America/Los_Angeles,en,(323)466-3876,,,2021-12-18


In [8]:
%%sql
SELECT * FROM `cal-itp-data-infra.gtfs_rt.vehicle_positions`
LIMIT 5

Unnamed: 0,calitp_itp_id,calitp_url_number,entity_id,vehicle_id,vehicle_trip_id,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp
0,350,0,vehicle_4262,4262,40089,1625856793,37.604324,-122.06929,1625856808
1,350,0,vehicle_4263,4263,40090,1625856793,37.610188,-122.066795,1625856808
2,350,0,vehicle_4264,4264,10171,1625856793,37.591003,-122.01802,1625856808
3,350,0,vehicle_4266,4266,10167,1625856793,37.59078,-122.01786,1625856808
4,350,0,vehicle_4267,4267,50137,1625856793,37.59561,-122.051575,1625856808


In [9]:
%%sql -o metro0_positions

# get all vehicle positions on selected dates, for the feed with itp id 170, and url number 0
SELECT *
FROM `cal-itp-data-infra.gtfs_rt.vehicle_positions`
WHERE _FILE_NAME='gs://gtfs-data/rt-processed/vehicle_positions/vp_2022-01-12_182_0.parquet' ##utc, must bracket 2 days for 1 day pacific
    # OR _FILE_NAME='gs://gtfs-data/rt-processed/vehicle_positions/vp_2021-12-01_290_0.parquet'
ORDER BY header_timestamp

Unnamed: 0,calitp_itp_id,calitp_url_number,entity_id,vehicle_id,vehicle_trip_id,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp
0,182,0,5817,5817,10105002581045-DEC21,1642013714,34.080147,-118.376580,1642013729
1,182,0,5810,5810,10053002331050-DEC21,1642013671,34.054066,-118.250320,1642013729
2,182,0,5811,5811,10051003301041-DEC21,1642013569,34.047920,-118.257150,1642013729
3,182,0,5813,5813,,1642013715,34.028564,-118.252525,1642013729
4,182,0,5814,5814,10055002841003-DEC21,1642013716,34.033695,-118.262460,1642013729
...,...,...,...,...,...,...,...,...,...
514894,182,0,5800,5800,10014005021606-DEC21,1642031974,34.035442,-118.368860,1642031988
514895,182,0,5801,5801,10002011261537-DEC21,1642031979,34.090210,-118.392490,1642031988
514896,182,0,5802,5802,10060001601558-DEC21,1642031974,33.925970,-118.210090,1642031988
514897,182,0,5803,5803,10212002141540-DEC21,1642031976,33.994614,-118.355760,1642031988


In [10]:
%%sql -o metro1_positions

# get all vehicle positions on selected dates, for the feed with itp id 170, and url number 0
SELECT *
FROM `cal-itp-data-infra.gtfs_rt.vehicle_positions`
WHERE _FILE_NAME='gs://gtfs-data/rt-processed/vehicle_positions/vp_2022-01-12_182_1.parquet' ##utc, must bracket 2 days for 1 day pacific
    # OR _FILE_NAME='gs://gtfs-data/rt-processed/vehicle_positions/vp_2021-12-01_290_0.parquet'
ORDER BY header_timestamp

Unnamed: 0,calitp_itp_id,calitp_url_number,entity_id,vehicle_id,vehicle_trip_id,vehicle_timestamp,vehicle_position_latitude,vehicle_position_longitude,header_timestamp
0,182,1,1035-1047-1068,1035-1047-1068,54949456,1642013730,34.047234,-118.26005,1642013769
1,182,1,1057,1057,54922444,1642013762,34.047770,-118.22785,1642013769
2,182,1,1020-1028,1020-1028,54949453,1642013752,34.025080,-118.35799,1642013769
3,182,1,537-538-547-548-579-580,537-538-547-548-579-580,55035306,1642013745,34.164757,-118.37452,1642013769
4,182,1,1026-1064-1069,1026-1064-1069,54949454,1642013762,34.018290,-118.30410,1642013769
...,...,...,...,...,...,...,...,...,...
19744,182,1,549-550-563-564-601-602,549-550-563-564-601-602,55035174,1642031983,34.101720,-118.31097,1642031989
19745,182,1,1075-1088-1136,1075-1088-1136,54949672,1642031947,33.812210,-118.19219,1642031989
19746,182,1,1023-1054-1063,1023-1054-1063,54949768,1642031977,34.040997,-118.26587,1642031989
19747,182,1,567-568-583-584,567-568-583-584,55035251,1642031983,34.061680,-118.30957,1642031989


In [11]:
(metro1_positions
 >> distinct(_.vehicle_trip_id, _.vehicle_timestamp, _keep_all=True)
 >> group_by(_.vehicle_trip_id)
 >> summarize(n = _.vehicle_timestamp.size, trip_time = _.vehicle_timestamp.max() - _.vehicle_timestamp.min())
 >> mutate(update_interval = _.trip_time / _.n, trip_minutes = _.trip_time / 60)
 >> mutate(mean_update_interval = _.update_interval.mean())
)

Unnamed: 0,vehicle_trip_id,n,trip_time,update_interval,trip_minutes,mean_update_interval
0,54922026,41,2675,65.243902,44.583333,58.210082
1,54922031,31,1780,57.419355,29.666667,58.210082
2,54922032,55,3251,59.109091,54.183333,58.210082
3,54922033,51,2999,58.803922,49.983333,58.210082
4,54922035,42,2416,57.523810,40.266667,58.210082
...,...,...,...,...,...,...
422,55035406,18,979,54.388889,16.316667,58.210082
423,55035407,12,1028,85.666667,17.133333,58.210082
424,55035408,30,1782,59.400000,29.700000,58.210082
425,55035409,26,1433,55.115385,23.883333,58.210082


In [12]:
(metro0_positions
 >> distinct(_.vehicle_trip_id, _.vehicle_timestamp, _keep_all=True)
 >> group_by(_.vehicle_trip_id)
 >> summarize(n = _.vehicle_timestamp.size, trip_time = _.vehicle_timestamp.max() - _.vehicle_timestamp.min())
 >> mutate(update_interval = _.trip_time / _.n, trip_minutes = _.trip_time / 60)
 >> mutate(median_update_interval = _.update_interval.median())
)

Unnamed: 0,vehicle_trip_id,n,trip_time,update_interval,trip_minutes,median_update_interval
0,10002011231518-DEC21,50,2465,49.300000,41.083333,51.744673
1,10002011240911-DEC21,3,60,20.000000,1.000000,51.744673
2,10002011240930-DEC21,33,1800,54.545455,30.000000,51.744673
3,10002011240940-DEC21,36,1885,52.361111,31.416667,51.744673
4,10002011240949-DEC21,43,2620,60.930233,43.666667,51.744673
...,...,...,...,...,...,...
4583,10910002121411-DEC21,53,3333,62.886792,55.550000,51.744673
4584,10910002121431-DEC21,155,7401,47.748387,123.350000,51.744673
4585,10910002121451-DEC21,109,5260,48.256881,87.666667,51.744673
4586,10910002121511-DEC21,89,4220,47.415730,70.333333,51.744673


In [28]:
bool(pd.DataFrame().empty)

True