# Add Service Alerts Functions

In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) ## 800GB?

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca

import shared_utils
# import utils

from siuba import *
import pandas as pd

import datetime as dt
import time



## Quick example of finding cancelled service in service alerts

In [2]:
alert_query = '''SELECT * FROM `cal-itp-data-infra.staging.stg_rt__service_alerts`
WHERE date BETWEEN '2022-05-11' AND '2022-05-12' AND calitp_itp_id in (290, 300)'''

In [110]:
alert_twodate = query_sql(alert_query)

In [111]:
#finding the unique service messages
alert_twodate.effect.unique()

array([None, 'UNKNOWN_EFFECT', 'REDUCED_SERVICE', 'MODIFIED_SERVICE',
       'ACCESSIBILITY_ISSUE', 'NO_SERVICE', 'STOP_MOVED', 'NO_EFFECT',
       'OTHER_EFFECT', 'DETOUR', 'ADDITIONAL_SERVICE'], dtype=object)

In [112]:
#Checking how many calitp_urls there are
alert_twodate>>group_by(_.calitp_itp_id)>>summarize(n = _.calitp_url_number.unique())

Unnamed: 0,calitp_itp_id,n
0,290,1
1,300,0


In [113]:
#filter for no service
alert_twodate>>filter(_.effect=="NO_SERVICE")

Unnamed: 0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
6459,290,1,gtfs-data/rt/2022-05-12T21:43:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,18525cd36e0f75e935f9fb0d6fa9f52d
6461,290,1,gtfs-data/rt/2022-05-12T21:44:00/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,f1229356744e5fe5a595ad1b1956876c
6469,290,1,gtfs-data/rt/2022-05-12T21:44:20/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,98c58b892afd7e62364d368cbbd85457
6483,290,1,gtfs-data/rt/2022-05-12T21:44:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,b96fbc7b5606c13cf30a2c4fbb015c55
6485,290,1,gtfs-data/rt/2022-05-12T21:45:00/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,90c0d44ddc26d0e649ba4bf712956059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
541383,300,0,gtfs-data/rt/2022-05-12T05:58:34/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,63a7cca902b7c52caa7dd52b3d161c7d
541384,300,0,gtfs-data/rt/2022-05-12T05:58:54/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,9d97bc3a05a8ddbb94e42bc5635e7590
541385,300,0,gtfs-data/rt/2022-05-12T05:59:14/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,ece7fee2cd16fff7c2fd685f0753ee07
541386,300,0,gtfs-data/rt/2022-05-12T05:59:34/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,273dd7a09ca5a0b4c917c7fc22e8c703


## Function for Service Alerts

In [114]:
def get_no_service_alerts(alerts_df,
                          analysis_date: list,
                          itp_id: list,
                          return_df):
    
    alerts_df = alerts_df>>filter(_.calitp_itp_id == itp_id) >> distinct(_.id, _keep_all=True)
    no_service = alerts_df>>filter(_.effect == 'NO_SERVICE')

    #get route_ids where if it appears in no_service
    no_service_rts = [entity[0]['routeId'] for entity in no_service.informed_entities.to_list()]
    
    routes = shared_utils.gtfs_utils.get_route_info(analysis_date, [itp_id])
    routes = routes >> filter(_.route_id.isin(no_service_rts))
    
    trips = shared_utils.gtfs_utils.get_trips(analysis_date, [itp_id])
    trips = trips >> filter(_.route_id.isin(no_service_rts))
    
    if return_df == 'trips':
        return trips
    elif return_df=='routes':
        return routes

In [115]:
no_service = get_no_service_alerts(alert_twodate,'2022-05-12', 300, 'trips')

In [116]:
no_service_agg = no_service>>count(_.route_id)
no_service_agg = no_service_agg.rename(columns={'n':'n_no_service'})

In [117]:
no_service_agg

Unnamed: 0,route_id,n_no_service
0,3501,104


### Read in existing `rt vs sched` table

In [118]:
import utils

In [119]:
rt_sched = utils.read_data()

In [120]:
rt_sched.sample()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district
2372,290,SamTrans,1,95,95,2022-05-12,2021-08-01,2022-06-24,2,2,1.0,Thursday,May,04 - Oakland


In [121]:
rt_sched_filtered = rt_sched>>filter(_.calitp_itp_id==300, _.service_date == '2022-05-12')

In [122]:
route = rt_sched_filtered>>filter(_.route_id == '3501')
route

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district
1566,300,Big Blue Bus,0,3501,44,2022-05-12,2022-03-08,2022-09-01,104,0,0.0,Thursday,May,07 - Los Angeles


#### Merge

In [123]:
pd.merge(rt_sched_filtered, no_service_agg, how='left', on= 'route_id')

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service
0,300,Big Blue Bus,0,3479,1,2022-05-12,2022-03-08,2022-09-01,180,180,1.0,Thursday,May,07 - Los Angeles,
1,300,Big Blue Bus,0,3480,2,2022-05-12,2022-03-08,2022-09-01,82,79,0.96,Thursday,May,07 - Los Angeles,
2,300,Big Blue Bus,0,3481,3,2022-05-12,2022-03-08,2022-09-01,145,133,0.92,Thursday,May,07 - Los Angeles,
3,300,Big Blue Bus,0,3485,7,2022-05-12,2022-03-08,2022-09-01,153,147,0.96,Thursday,May,07 - Los Angeles,
4,300,Big Blue Bus,0,3486,8,2022-05-12,2022-03-08,2022-09-01,71,71,1.0,Thursday,May,07 - Los Angeles,
5,300,Big Blue Bus,0,3487,9,2022-05-12,2022-03-08,2022-09-01,68,67,0.99,Thursday,May,07 - Los Angeles,
6,300,Big Blue Bus,0,3489,R12,2022-05-12,2022-03-08,2022-09-01,153,153,1.0,Thursday,May,07 - Los Angeles,
7,300,Big Blue Bus,0,3490,14,2022-05-12,2022-03-08,2022-09-01,107,107,1.0,Thursday,May,07 - Los Angeles,
8,300,Big Blue Bus,0,3493,17,2022-05-12,2022-03-08,2022-09-01,88,81,0.92,Thursday,May,07 - Los Angeles,
9,300,Big Blue Bus,0,3494,18,2022-05-12,2022-03-08,2022-09-01,53,53,1.0,Thursday,May,07 - Los Angeles,


## For two agencies

In [176]:
def get_no_service_alerts2(df,
                          analysis_date: str,
                          itp_ids: list,
                          return_df):
    
    #good for one date as of now. removing calitp_itp_id filter for initial df
    df = df >> distinct(_.id, _keep_all=True)
    no_service = df>>filter(_.effect == 'NO_SERVICE')

    #get route_ids where if it appears in no_service
    no_service_rts = [entity[0]['routeId'] for entity in no_service.informed_entities.to_list()]
    print(no_service_rts)
    #remove None flag if comes up with routes
   # no_service_rts.remove(None)
    
    if return_df == 'trips':
        trips = shared_utils.gtfs_utils.get_trips(analysis_date, itp_ids)
        trips = trips >> filter(_.route_id.isin(no_service_rts))
        
        return trips
    
    elif return_df=='routes':
        routes = shared_utils.gtfs_utils.get_route_info(analysis_date, itp_ids)
        routes = routes >> filter(_.route_id.isin(no_service_rts))
        
        return routes

In [177]:
trip_alerts = get_no_service_alerts2(alert_twodate,
                          analysis_date= '2022-05-11',
                          itp_ids = [290, 300],
                          return_df = 'trips')

['2', None, '17', '3371', '3501']


In [151]:
trip_alerts.sample()

Unnamed: 0,feed_key,trip_key,trip_id,route_id,calitp_itp_id,calitp_url_number,service_id,service_date,service_indicator,service_start_date,...,direction_id,trip_headsign,calitp_deleted_at,trip_short_name,wheelchair_accessible,block_id,bikes_allowed,calitp_hash,shape_id,calitp_extracted_at
3116,4100255323842349957,3038183767997729681,883683,3501,300,0,77710,2022-05-11,0,2022-03-27,...,1,SMC Bundy Campus,2022-07-18,,0,,0,a0lpi+umSCjgqACpR8+JPA==,25956,2022-03-29


In [152]:
def agg_alerts(df):
    no_service = df >> group_by(_.calitp_itp_id) >>count(_.route_id)
    no_service = no_service.rename(columns={'n':'n_no_service'})
    
    return no_service

In [153]:
agg_alerts(trip_alerts)

Unnamed: 0,calitp_itp_id,route_id,n_no_service
0,290,17,33
1,300,3501,104


In [154]:
def add_to_rtsched_table(trips_alerts_df,
                         rt_sched_table,
                         analysis_date,
                        ):
    no_service = agg_alerts(trips_alerts_df)
    
    rt_sched_table = rt_sched_table>>filter(_.service_date==analysis_date)
    
    #merge trips with no service with rt_vs_sched table
    fulldf = (pd.merge(rt_sched_table, no_service, how='left', on= ['calitp_itp_id','route_id']))
    
    #fill NA with 0 to get the correct number of scheduled trips
    fulldf['n_no_service'] = fulldf['n_no_service'].fillna(value=0)
    fulldf['num_sched_true'] = fulldf['num_sched'] - fulldf['n_no_service']
    
    return fulldf

In [155]:
full_df = add_to_rtsched_table(trip_alerts,
                         rt_sched,
                         analysis_date = '2022-05-11'
                        )

In [156]:
full_df

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service,num_sched_true
0,290,SamTrans,1,110,110,2022-05-11,2021-04-16,2099-01-01,50,50,1.00,Wednesday,May,04 - Oakland,0.00,50.00
1,290,SamTrans,1,120,120,2022-05-11,2021-04-16,2099-01-01,139,138,0.99,Wednesday,May,04 - Oakland,0.00,139.00
2,290,SamTrans,1,121,121,2022-05-11,2021-04-16,2022-07-23,32,32,1.00,Wednesday,May,04 - Oakland,0.00,32.00
3,290,SamTrans,1,122,122,2022-05-11,2021-04-16,2099-01-01,74,73,0.99,Wednesday,May,04 - Oakland,0.00,74.00
4,290,SamTrans,1,130,130,2022-05-11,2021-04-16,2022-07-23,130,130,1.00,Wednesday,May,04 - Oakland,0.00,130.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,290,SamTrans,1,17,17,2022-05-11,2021-04-16,2022-08-08,33,33,1.00,Wednesday,May,04 - Oakland,33.00,0.00
82,290,SamTrans,1,112,112,2022-05-11,2021-04-16,2022-07-23,33,33,1.00,Wednesday,May,04 - Oakland,0.00,33.00
83,300,Big Blue Bus,0,3500,43,2022-05-11,2022-03-08,2022-09-01,35,35,1.00,Wednesday,May,07 - Los Angeles,0.00,35.00
84,290,SamTrans,1,398,398,2022-05-11,2021-04-16,2022-08-08,36,36,1.00,Wednesday,May,04 - Oakland,0.00,36.00


### written out

In [25]:
no_service2 = alerts>>filter(_.effect=="NO_SERVICE") >> distinct(_.id, _keep_all=True)

In [26]:
no_service2

Unnamed: 0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
0,290,1,gtfs-data/rt/2022-05-12T21:43:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,18525cd36e0f75e935f9fb0d6fa9f52d
1,290,1,gtfs-data/rt/2022-05-11T07:00:08/290/1/gtfs_rt...,2022-05-11,MA_18202,"[{'start': 1652079600, 'end': 1652597939}]","[{'agencyId': 'MA', 'routeId': None, 'routeTyp...",CONSTRUCTION,NO_SERVICE,"[{'text': 'https://marintransit.org/', 'langua...",[],[],[],[],,fe8088550b2eb3aa28700fc67c81b18b
2,290,1,gtfs-data/rt/2022-05-11T07:00:08/290/1/gtfs_rt...,2022-05-11,MA_18203,"[{'start': 1652079600, 'end': 1652597939}]","[{'agencyId': 'MA', 'routeId': '17', 'routeTyp...",CONSTRUCTION,NO_SERVICE,"[{'text': 'https://marintransit.org/', 'langua...",[],[],[],[],,f7cc071b38072f9ce87a27a057abe39f
3,300,0,gtfs-data/rt/2022-05-11T16:00:14/300/0/gtfs_rt...,2022-05-11,30856f5c-c89a-44bf-a9f7-66f6212d6d7c,"[{'start': 1641801600, 'end': None}]","[{'agencyId': None, 'routeId': '3371', 'routeT...",OTHER_CAUSE,NO_SERVICE,[{'text': 'https://www.bigbluebus.com/servicea...,[],[],[],[],,ca34513e31c268e77a2e355c9a216e6d
4,300,0,gtfs-data/rt/2022-05-11T16:00:14/300/0/gtfs_rt...,2022-05-11,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,d4710fc0982bab7e7664327b34d8ed42


In [27]:
no_service_rts2 = [entity[0]['routeId'] for entity in no_service2.informed_entities.to_list()]

In [28]:
no_service_rts2.remove(None)

In [29]:
no_service_rts2

['2', '17', '3371', '3501']

In [30]:
## have to keep one date instead of 2

In [31]:
analysis_dates= '2022-05-11'
itp_ids= [290, 300]

In [32]:
routes2 = shared_utils.gtfs_utils.get_route_info(analysis_dates, itp_ids)

In [33]:
routes2.sample()

Unnamed: 0,feed_key,route_key,date,route_short_name,route_desc,calitp_deleted_at,agency_id,agency_name,route_long_name,calitp_itp_id,...,route_id,route_continuous_pickup,route_url,agency_fare_url,agency_timezone,agency_email,route_type,agency_phone,route_continuous_drop_off,agency_lang
142,-4021584186578633324,6070283211440112721,2022-05-11,80,,2022-06-19,,,Oak Knoll ES - Santa Cruz/Elder,290,...,80-196,,,,,,3,,,


#### expected output

In [34]:
routes2 = routes2 >> filter(_.route_id.isin(no_service_rts2))

In [35]:
routes2

Unnamed: 0,feed_key,route_key,date,route_short_name,route_desc,calitp_deleted_at,agency_id,agency_name,route_long_name,calitp_itp_id,...,route_id,route_continuous_pickup,route_url,agency_fare_url,agency_timezone,agency_email,route_type,agency_phone,route_continuous_drop_off,agency_lang
47,4100255323842349957,7100850145565938766,2022-05-11,44,,2022-09-01,6216179,Big Blue Bus,17th St Sta - SMC Bundy Campus,300,...,3501,,http://bigbluebus.com/Routes-and-Schedules/Rou...,,America/Los_Angeles,,3,310-451-5444,,en
57,3742542532326916514,5751554105033898922,2022-05-11,17,,2022-08-08,SM,SamTrans,Linda Mar Park & Ride - Pescadero,290,...,17,,,,America/Los_Angeles,,3,800-660-4287,,en


In [36]:
trips2 = shared_utils.gtfs_utils.get_trips(analysis_dates, itp_ids)
trips2 = trips2 >> filter(_.route_id.isin(no_service_rts2))

In [37]:
trips2.sample()

Unnamed: 0,feed_key,trip_key,trip_id,route_id,calitp_itp_id,calitp_url_number,service_id,service_date,service_indicator,service_start_date,...,direction_id,trip_headsign,calitp_deleted_at,trip_short_name,wheelchair_accessible,block_id,bikes_allowed,calitp_hash,shape_id,calitp_extracted_at
3114,4100255323842349957,-8672651115184209467,883691,3501,300,0,77710,2022-05-11,0,2022-03-27,...,1,SMC Bundy Campus,2022-07-18,,0,,0,C/REolUsEjZwgtL8ecllJQ==,25956,2022-03-29


In [38]:
no_service_agg2 = trips2 >> group_by(_.calitp_itp_id) >>count(_.route_id)

In [39]:
no_service_agg2 = no_service_agg2.rename(columns={'n':'n_no_service'})

In [40]:
no_service_agg2

Unnamed: 0,calitp_itp_id,route_id,n_no_service
0,290,17,33
1,300,3501,104


#### Adding rt_sched

In [41]:
##filtering just for service date since we already have the two org
## in the future will need to filter for org
rt_sched_filtered2 = rt_sched>>filter(_.service_date == '2022-05-12')

In [42]:
rt_sched_filtered2.sample(5)

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district
350,290,SamTrans,1,260,260,2022-05-12,2021-04-16,2022-07-23,26,26,1.0,Thursday,May,04 - Oakland
1838,290,SamTrans,1,28,28,2022-05-12,2021-08-01,2022-06-24,2,2,1.0,Thursday,May,04 - Oakland
2691,290,SamTrans,1,58,58,2022-05-12,2021-08-01,2022-06-24,4,4,1.0,Thursday,May,04 - Oakland
2604,290,SamTrans,1,35,35,2022-05-12,2021-08-01,2022-06-24,4,4,1.0,Thursday,May,04 - Oakland
3735,290,SamTrans,1,17,17,2022-05-12,2021-04-16,2022-08-08,33,33,1.0,Thursday,May,04 - Oakland


In [43]:
full = (pd.merge(rt_sched_filtered2, no_service_agg2, how='left', on= ['calitp_itp_id','route_id']))

In [44]:
full.sample(5)

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service
21,300,Big Blue Bus,0,3493,17,2022-05-12,2022-03-08,2022-09-01,88,81,0.92,Thursday,May,07 - Los Angeles,
1,290,SamTrans,1,120,120,2022-05-12,2021-04-16,2099-01-01,139,139,1.0,Thursday,May,04 - Oakland,
68,290,SamTrans,1,38,38,2022-05-12,2021-04-16,2022-08-08,12,12,1.0,Thursday,May,04 - Oakland,
4,290,SamTrans,1,130,130,2022-05-12,2021-04-16,2022-07-23,130,130,1.0,Thursday,May,04 - Oakland,
63,290,SamTrans,1,60,60,2022-05-12,2021-08-01,2022-06-24,6,6,1.0,Thursday,May,04 - Oakland,


In [45]:
full>>filter(_.n_no_service.notnull())

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service
26,300,Big Blue Bus,0,3501,44,2022-05-12,2022-03-08,2022-09-01,104,0,0.0,Thursday,May,07 - Los Angeles,104.0
78,290,SamTrans,1,17,17,2022-05-12,2021-04-16,2022-08-08,33,33,1.0,Thursday,May,04 - Oakland,33.0


In [46]:
full['n_no_service'] = full['n_no_service'].fillna(value=0)

In [47]:
full['num_sched_true'] = full['num_sched'] - full['n_no_service']

In [48]:
full>>arrange(-_.n_no_service)

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service,num_sched_true
26,300,Big Blue Bus,0,3501,44,2022-05-12,2022-03-08,2022-09-01,104,0,0.00,Thursday,May,07 - Los Angeles,104.00,0.00
78,290,SamTrans,1,17,17,2022-05-12,2021-04-16,2022-08-08,33,33,1.00,Thursday,May,04 - Oakland,33.00,0.00
0,290,SamTrans,1,110,110,2022-05-12,2021-04-16,2099-01-01,50,50,1.00,Thursday,May,04 - Oakland,0.00,50.00
1,290,SamTrans,1,120,120,2022-05-12,2021-04-16,2099-01-01,139,139,1.00,Thursday,May,04 - Oakland,0.00,139.00
2,290,SamTrans,1,121,121,2022-05-12,2021-04-16,2022-07-23,32,32,1.00,Thursday,May,04 - Oakland,0.00,32.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,300,Big Blue Bus,0,3500,43,2022-05-12,2022-03-08,2022-09-01,35,35,1.00,Thursday,May,07 - Los Angeles,0.00,35.00
82,290,SamTrans,1,398,398,2022-05-12,2021-04-16,2022-08-08,36,36,1.00,Thursday,May,04 - Oakland,0.00,36.00
83,300,Big Blue Bus,0,3498,41,2022-05-12,2022-03-08,2022-09-01,39,39,1.00,Thursday,May,07 - Los Angeles,0.00,39.00
84,290,SamTrans,1,141,141,2022-05-12,2021-04-16,2022-07-23,54,54,1.00,Thursday,May,04 - Oakland,0.00,54.00


notes:
    * interesting that SamTran's "No Service" Routes do have vehicle positions.. need to check that out 

### Re-aggregating the `pct_w_vp`

In [157]:
# testing with utils using new true num_sched

utils.get_agg_pct(full_df,
                groupings = ['calitp_itp_id', 'agency_name'],
                sum_sched= 'num_sched_true',
                sum_vp = 'num_vp'
               )

Unnamed: 0,calitp_itp_id,agency_name,num_sched_true,num_vp,avg
0,290,SamTrans,1422.0,1376,0.97
1,300,Big Blue Bus,1412.0,1381,0.98


In [158]:
#using regular num_sched
utils.get_agg_pct(full_df,
                groupings = ['calitp_itp_id', 'agency_name'],
                sum_sched= 'num_sched',
                sum_vp = 'num_vp'
               )

Unnamed: 0,calitp_itp_id,agency_name,num_sched,num_vp,avg
0,290,SamTrans,1455,1376,0.95
1,300,Big Blue Bus,1516,1381,0.91


## Add more agencies
* using May 4th table

In [159]:
may4 = query_sql(f'''
SELECT * FROM `cal-itp-data-infra.views.gtfs_rt_vs_schedule_trips_may4_sample`
''')

In [135]:
may4_orgs = sorted(list(may4.calitp_itp_id.unique()))

In [53]:
#need alerts first
# no_service_all = get_no_service_alerts(alerts,'2022-05-12', 300, 'trips')

In [160]:
def get_alert_data(analysis_date):
    alerts = query_sql(
        f"""
        SELECT * FROM `cal-itp-data-infra.staging.stg_rt__service_alerts`
        WHERE date = '{analysis_date}' AND effect = 'NO_SERVICE'
                """)
    ## removing itp for now
       # AND calitp_itp_id in ({itp_list}) 

    return alerts
                

In [161]:
alerts = get_alert_data('2022-05-04')

In [162]:
len(alerts)

175881

In [163]:
alerts.head()

Unnamed: 0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
0,221,0,gtfs-data/rt/2022-05-04T00:00:07/221/0/gtfs_rt...,2022-05-04,c4a44a15-aa31-4bd0-9f9c-eb78c8f4989d,"[{'start': 1647270840, 'end': None}]","[{'agencyId': None, 'routeId': '14212', 'route...",CONSTRUCTION,NO_SERVICE,[],[],[],[],[],,d9e722e08dda31d726d163833b4693b0
1,221,0,gtfs-data/rt/2022-05-04T00:00:27/221/0/gtfs_rt...,2022-05-04,c4a44a15-aa31-4bd0-9f9c-eb78c8f4989d,"[{'start': 1647270840, 'end': None}]","[{'agencyId': None, 'routeId': '14212', 'route...",CONSTRUCTION,NO_SERVICE,[],[],[],[],[],,4395e234b083c6955cbf9fd0929affd7
2,221,0,gtfs-data/rt/2022-05-04T00:00:47/221/0/gtfs_rt...,2022-05-04,c4a44a15-aa31-4bd0-9f9c-eb78c8f4989d,"[{'start': 1647270840, 'end': None}]","[{'agencyId': None, 'routeId': '14212', 'route...",CONSTRUCTION,NO_SERVICE,[],[],[],[],[],,2c2b85942177272e044add4617281bba
3,221,0,gtfs-data/rt/2022-05-04T00:01:07/221/0/gtfs_rt...,2022-05-04,c4a44a15-aa31-4bd0-9f9c-eb78c8f4989d,"[{'start': 1647270840, 'end': None}]","[{'agencyId': None, 'routeId': '14212', 'route...",CONSTRUCTION,NO_SERVICE,[],[],[],[],[],,2f502b071bfa754f41127b3377382a75
4,221,0,gtfs-data/rt/2022-05-04T00:01:27/221/0/gtfs_rt...,2022-05-04,c4a44a15-aa31-4bd0-9f9c-eb78c8f4989d,"[{'start': 1647270840, 'end': None}]","[{'agencyId': None, 'routeId': '14212', 'route...",CONSTRUCTION,NO_SERVICE,[],[],[],[],[],,3b13299440a07514d3a53ade8e93d0d6


In [164]:
len(may4_orgs)

167

In [165]:
len(list(alerts.calitp_itp_id.unique()))

34

In [187]:
# getting error for metrolink, itp_id 323
## will remove from list

may4_orgs.remove(323)

In [167]:
may4_orgs

[4,
 6,
 10,
 11,
 13,
 16,
 17,
 18,
 23,
 24,
 29,
 30,
 34,
 35,
 36,
 37,
 42,
 45,
 48,
 49,
 50,
 56,
 61,
 70,
 71,
 75,
 76,
 77,
 79,
 82,
 83,
 86,
 87,
 95,
 98,
 99,
 101,
 102,
 103,
 105,
 106,
 108,
 110,
 112,
 116,
 118,
 120,
 121,
 122,
 123,
 126,
 127,
 129,
 135,
 137,
 142,
 146,
 148,
 152,
 154,
 159,
 162,
 167,
 168,
 170,
 171,
 172,
 173,
 174,
 176,
 177,
 178,
 179,
 181,
 182,
 183,
 187,
 188,
 190,
 194,
 198,
 199,
 200,
 201,
 204,
 208,
 210,
 212,
 217,
 218,
 220,
 221,
 226,
 228,
 231,
 232,
 235,
 238,
 239,
 243,
 246,
 247,
 251,
 257,
 259,
 260,
 261,
 264,
 269,
 270,
 271,
 273,
 274,
 278,
 279,
 280,
 281,
 282,
 284,
 287,
 289,
 290,
 293,
 294,
 295,
 296,
 300,
 301,
 305,
 308,
 310,
 312,
 314,
 315,
 320,
 323,
 324,
 329,
 331,
 334,
 336,
 337,
 339,
 341,
 343,
 344,
 346,
 349,
 350,
 351,
 356,
 360,
 361,
 366,
 368,
 372,
 374,
 376,
 380,
 381,
 386,
 389,
 473,
 474,
 482,
 483,
 484]

In [168]:
alerts>>filter(_.calitp_itp_id==290)

Unnamed: 0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
55473,290,1,gtfs-data/rt/2022-05-04T17:00:18/290/1/gtfs_rt...,2022-05-04,FS_d961beae-b2bc-4fb7-b46f-abe0ec672ec2,"[{'start': 1651561200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,418fc8decb975edcf6b4ef6d65c69900
55474,290,1,gtfs-data/rt/2022-05-04T17:00:38/290/1/gtfs_rt...,2022-05-04,FS_d961beae-b2bc-4fb7-b46f-abe0ec672ec2,"[{'start': 1651561200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,a067d097c43f191eef8f7bb64db8ba74
55475,290,1,gtfs-data/rt/2022-05-04T17:00:58/290/1/gtfs_rt...,2022-05-04,FS_d961beae-b2bc-4fb7-b46f-abe0ec672ec2,"[{'start': 1651561200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,e527fd4b8fee6ef861ea01b2480f31c1
55476,290,1,gtfs-data/rt/2022-05-04T17:01:18/290/1/gtfs_rt...,2022-05-04,FS_d961beae-b2bc-4fb7-b46f-abe0ec672ec2,"[{'start': 1651561200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,4c524696519b33ee90fa77f1e1f32a4d
55477,290,1,gtfs-data/rt/2022-05-04T17:01:38/290/1/gtfs_rt...,2022-05-04,FS_d961beae-b2bc-4fb7-b46f-abe0ec672ec2,"[{'start': 1651561200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,9b3c5bf72ec8ac7aa427cddd8d7f6108
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132547,290,1,gtfs-data/rt/2022-05-04T23:12:56/290/1/gtfs_rt...,2022-05-04,FS_655a07d5-d7c3-41b7-8972-eab396e5c2fe,"[{'start': 1651939200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,e211403fa87e3463cf0643040617ec9d
132548,290,1,gtfs-data/rt/2022-05-04T23:13:16/290/1/gtfs_rt...,2022-05-04,FS_655a07d5-d7c3-41b7-8972-eab396e5c2fe,"[{'start': 1651939200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,26d7ee6a6a370e6aa9984e13de95d969
132549,290,1,gtfs-data/rt/2022-05-04T23:13:36/290/1/gtfs_rt...,2022-05-04,FS_655a07d5-d7c3-41b7-8972-eab396e5c2fe,"[{'start': 1651939200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,89507b8f01a1b8858702a23b132e4155
132550,290,1,gtfs-data/rt/2022-05-04T23:13:56/290/1/gtfs_rt...,2022-05-04,FS_655a07d5-d7c3-41b7-8972-eab396e5c2fe,"[{'start': 1651939200, 'end': 1652050800}]","[{'agencyId': 'FS', 'routeId': None, 'routeTyp...",TECHNICAL_PROBLEM,NO_SERVICE,[],[],[],[],[],,9a94d3aeb5867c4872f216a4a40e1056


In [144]:
#starting with a sample of itp_ids

In [180]:
may4_trip_alerts = get_no_service_alerts2(alerts,
                          analysis_date = '2022-05-04',
                          itp_ids = [290],
                          return_df= 'trips')

['14212', None, None, None, '3501', '3371', None, None, None, '3404', '3397', '3385', '3385', '4940', None]


In [185]:
get_no_service_alerts2(alerts,
                          analysis_date = '2022-05-04',
                          itp_ids = [122],
                          return_df= 'trips')

['14212', None, None, None, '3501', '3371', None, None, None, '3404', '3397', '3385', '3385', '4940', None]


Unnamed: 0,feed_key,trip_key,trip_id,route_id,calitp_itp_id,calitp_url_number,service_id,service_date,service_indicator,service_start_date,...,direction_id,trip_headsign,calitp_deleted_at,trip_short_name,wheelchair_accessible,block_id,bikes_allowed,calitp_hash,shape_id,calitp_extracted_at


In [181]:
may4_trip_alerts = get_no_service_alerts2(alerts,
                          analysis_date = '2022-05-04',
                          itp_ids = [290],
                          return_df= 'trips')

['14212', None, None, None, '3501', '3371', None, None, None, '3404', '3397', '3385', '3385', '4940', None]


Unnamed: 0,feed_key,trip_key,trip_id,route_id,calitp_itp_id,calitp_url_number,service_id,service_date,service_indicator,service_start_date,...,direction_id,trip_headsign,calitp_deleted_at,trip_short_name,wheelchair_accessible,block_id,bikes_allowed,calitp_hash,shape_id,calitp_extracted_at


In [188]:
may4_trip_alerts = get_no_service_alerts2(alerts,
                          analysis_date = '2022-05-04',
                          itp_ids = may4_orgs,
                          return_df= 'trips')

['14212', None, None, None, '3501', '3371', None, None, None, '3404', '3397', '3385', '3385', '4940', None]


TypeError: Object of type int64 is not JSON serializable

In [182]:
may4_trip_alerts

Unnamed: 0,feed_key,trip_key,trip_id,route_id,calitp_itp_id,calitp_url_number,service_id,service_date,service_indicator,service_start_date,...,direction_id,trip_headsign,calitp_deleted_at,trip_short_name,wheelchair_accessible,block_id,bikes_allowed,calitp_hash,shape_id,calitp_extracted_at


In [183]:
agg_alerts(may4_trip_alerts)

Unnamed: 0,calitp_itp_id,route_id,n_no_service


In [184]:
add_to_rtsched_table(may4_trip_alerts,
                         may4,
                         analysis_date ='2022-05-04',
                        )

Unnamed: 0,agency_name,calitp_url_number,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,calitp_itp_id,route_id,n_no_service,num_sched_true
