# Service Alerts and Trip Updates Functions

In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) ## 800GB?

from calitp.tables import tbl
from calitp import query_sql
import calitp.magics
import branca

import shared_utils
# import utils

from siuba import *
import pandas as pd

import datetime as dt
import time



## Quick example of finding cancelled service in service alerts

In [20]:
alert_query = '''SELECT * FROM `cal-itp-data-infra.staging.stg_rt__service_alerts`
WHERE date BETWEEN '2022-05-11' AND '2022-05-12' AND calitp_itp_id in (290, 300)'''

In [21]:
alerts = query_sql(alert_query)

In [32]:
#finding the unique service messages
alerts.effect.unique()

array([None, 'UNKNOWN_EFFECT', 'REDUCED_SERVICE', 'MODIFIED_SERVICE',
       'ACCESSIBILITY_ISSUE', 'NO_SERVICE', 'STOP_MOVED', 'NO_EFFECT',
       'OTHER_EFFECT', 'DETOUR', 'ADDITIONAL_SERVICE'], dtype=object)

In [33]:
#Checking how many calitp_urls there are
alerts>>group_by(_.calitp_itp_id)>>summarize(n = _.calitp_url_number.unique())

Unnamed: 0,calitp_itp_id,n
0,290,1
1,300,0


In [34]:
#filter for no service
alerts>>filter(_.effect=="NO_SERVICE")

Unnamed: 0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
6459,290,1,gtfs-data/rt/2022-05-12T21:43:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,18525cd36e0f75e935f9fb0d6fa9f52d
6461,290,1,gtfs-data/rt/2022-05-12T21:44:00/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,f1229356744e5fe5a595ad1b1956876c
6469,290,1,gtfs-data/rt/2022-05-12T21:44:20/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,98c58b892afd7e62364d368cbbd85457
6483,290,1,gtfs-data/rt/2022-05-12T21:44:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,b96fbc7b5606c13cf30a2c4fbb015c55
6485,290,1,gtfs-data/rt/2022-05-12T21:45:00/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,90c0d44ddc26d0e649ba4bf712956059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
541383,300,0,gtfs-data/rt/2022-05-12T05:58:34/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,63a7cca902b7c52caa7dd52b3d161c7d
541384,300,0,gtfs-data/rt/2022-05-12T05:58:54/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,9d97bc3a05a8ddbb94e42bc5635e7590
541385,300,0,gtfs-data/rt/2022-05-12T05:59:14/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,ece7fee2cd16fff7c2fd685f0753ee07
541386,300,0,gtfs-data/rt/2022-05-12T05:59:34/300/0/gtfs_rt...,2022-05-12,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,273dd7a09ca5a0b4c917c7fc22e8c703


## Function for Service Alerts

In [7]:
def get_no_service_alerts(df,
                          analysis_date: list,
                          itp_id: list,
                          return_df):
    
    df = df>>filter(_.calitp_itp_id == itp_id) >> distinct(_.id, _keep_all=True)
    no_service = df>>filter(_.effect == 'NO_SERVICE')

    #get route_ids where if it appears in no_service
    no_service_rts = [entity[0]['routeId'] for entity in no_service.informed_entities.to_list()]
    
    routes = shared_utils.gtfs_utils.get_route_info(analysis_date, [itp_id])
    routes = routes >> filter(_.route_id.isin(no_service_rts))
    
    trips = shared_utils.gtfs_utils.get_trips(analysis_date, [itp_id])
    trips = trips >> filter(_.route_id.isin(no_service_rts))
    
    if return_df == 'trips':
        return trips
    elif return_df=='routes':
        return routes

In [8]:
no_service = get_no_service_alerts(alerts,'2022-05-12', 300, 'trips')

In [9]:
no_service_agg = no_service>>count(_.route_id)
no_service_agg = no_service_agg.rename(columns={'n':'n_no_service'})

In [10]:
no_service_agg

Unnamed: 0,route_id,n_no_service
0,3501,104


### Read in existing `rt vs sched` table

In [12]:
import utils

In [66]:
rt_sched = utils.read_data()

In [14]:
rt_sched.sample()

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district
1845,290,SamTrans,1,28,28,2022-05-19,2021-08-01,2022-06-24,2,2,1.0,Thursday,May,04 - Oakland


In [67]:
rt_sched_filtered = rt_sched>>filter(_.calitp_itp_id==300, _.service_date == '2022-05-12')

In [68]:
route = rt_sched_filtered>>filter(_.route_id == '3501')
route

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district
1566,300,Big Blue Bus,0,3501,44,2022-05-12,2022-03-08,2022-09-01,104,0,0.0,Thursday,May,07 - Los Angeles


#### Merge

In [69]:
pd.merge(rt_sched_filtered, no_service_agg, how='left', on= 'route_id')

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service
0,300,Big Blue Bus,0,3479,1,2022-05-12,2022-03-08,2022-09-01,180,180,1.0,Thursday,May,07 - Los Angeles,
1,300,Big Blue Bus,0,3480,2,2022-05-12,2022-03-08,2022-09-01,82,79,0.96,Thursday,May,07 - Los Angeles,
2,300,Big Blue Bus,0,3481,3,2022-05-12,2022-03-08,2022-09-01,145,133,0.92,Thursday,May,07 - Los Angeles,
3,300,Big Blue Bus,0,3485,7,2022-05-12,2022-03-08,2022-09-01,153,147,0.96,Thursday,May,07 - Los Angeles,
4,300,Big Blue Bus,0,3486,8,2022-05-12,2022-03-08,2022-09-01,71,71,1.0,Thursday,May,07 - Los Angeles,
5,300,Big Blue Bus,0,3487,9,2022-05-12,2022-03-08,2022-09-01,68,67,0.99,Thursday,May,07 - Los Angeles,
6,300,Big Blue Bus,0,3489,R12,2022-05-12,2022-03-08,2022-09-01,153,153,1.0,Thursday,May,07 - Los Angeles,
7,300,Big Blue Bus,0,3490,14,2022-05-12,2022-03-08,2022-09-01,107,107,1.0,Thursday,May,07 - Los Angeles,
8,300,Big Blue Bus,0,3493,17,2022-05-12,2022-03-08,2022-09-01,88,81,0.92,Thursday,May,07 - Los Angeles,
9,300,Big Blue Bus,0,3494,18,2022-05-12,2022-03-08,2022-09-01,53,53,1.0,Thursday,May,07 - Los Angeles,


## For two agencies

In [31]:
alerts>>filter(_.effect=="NO_SERVICE")>>group_by(_.calitp_itp_id) >> distinct(_.id, _keep_all=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
calitp_itp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
290,0,290,1,gtfs-data/rt/2022-05-12T21:43:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,18525cd36e0f75e935f9fb0d6fa9f52d
290,1,290,1,gtfs-data/rt/2022-05-11T07:00:08/290/1/gtfs_rt...,2022-05-11,MA_18202,"[{'start': 1652079600, 'end': 1652597939}]","[{'agencyId': 'MA', 'routeId': None, 'routeTyp...",CONSTRUCTION,NO_SERVICE,"[{'text': 'https://marintransit.org/', 'langua...",[],[],[],[],,fe8088550b2eb3aa28700fc67c81b18b
290,2,290,1,gtfs-data/rt/2022-05-11T07:00:08/290/1/gtfs_rt...,2022-05-11,MA_18203,"[{'start': 1652079600, 'end': 1652597939}]","[{'agencyId': 'MA', 'routeId': '17', 'routeTyp...",CONSTRUCTION,NO_SERVICE,"[{'text': 'https://marintransit.org/', 'langua...",[],[],[],[],,f7cc071b38072f9ce87a27a057abe39f
300,0,300,0,gtfs-data/rt/2022-05-11T16:00:14/300/0/gtfs_rt...,2022-05-11,30856f5c-c89a-44bf-a9f7-66f6212d6d7c,"[{'start': 1641801600, 'end': None}]","[{'agencyId': None, 'routeId': '3371', 'routeT...",OTHER_CAUSE,NO_SERVICE,[{'text': 'https://www.bigbluebus.com/servicea...,[],[],[],[],,ca34513e31c268e77a2e355c9a216e6d
300,1,300,0,gtfs-data/rt/2022-05-11T16:00:14/300/0/gtfs_rt...,2022-05-11,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,d4710fc0982bab7e7664327b34d8ed42


In [37]:
no_service2 = alerts>>filter(_.effect=="NO_SERVICE") >> distinct(_.id, _keep_all=True)

In [38]:
no_service2

Unnamed: 0,calitp_itp_id,calitp_url_number,original_file_path,date,id,active_periods,informed_entities,cause,effect,url_translations,header_text_translations,description_text_translations,tts_header_text_translations,tts_description_text_translations,severity_level,key
0,290,1,gtfs-data/rt/2022-05-12T21:43:40/290/1/gtfs_rt...,2022-05-12,FS_9f5f4e62-c99f-45c1-9ffd-3c19b61d1e41,"[{'start': 1652393700, 'end': 1652404500}]","[{'agencyId': 'FS', 'routeId': '2', 'routeType...",OTHER_CAUSE,NO_SERVICE,[],[],[],[],[],,18525cd36e0f75e935f9fb0d6fa9f52d
1,290,1,gtfs-data/rt/2022-05-11T07:00:08/290/1/gtfs_rt...,2022-05-11,MA_18202,"[{'start': 1652079600, 'end': 1652597939}]","[{'agencyId': 'MA', 'routeId': None, 'routeTyp...",CONSTRUCTION,NO_SERVICE,"[{'text': 'https://marintransit.org/', 'langua...",[],[],[],[],,fe8088550b2eb3aa28700fc67c81b18b
2,290,1,gtfs-data/rt/2022-05-11T07:00:08/290/1/gtfs_rt...,2022-05-11,MA_18203,"[{'start': 1652079600, 'end': 1652597939}]","[{'agencyId': 'MA', 'routeId': '17', 'routeTyp...",CONSTRUCTION,NO_SERVICE,"[{'text': 'https://marintransit.org/', 'langua...",[],[],[],[],,f7cc071b38072f9ce87a27a057abe39f
3,300,0,gtfs-data/rt/2022-05-11T16:00:14/300/0/gtfs_rt...,2022-05-11,30856f5c-c89a-44bf-a9f7-66f6212d6d7c,"[{'start': 1641801600, 'end': None}]","[{'agencyId': None, 'routeId': '3371', 'routeT...",OTHER_CAUSE,NO_SERVICE,[{'text': 'https://www.bigbluebus.com/servicea...,[],[],[],[],,ca34513e31c268e77a2e355c9a216e6d
4,300,0,gtfs-data/rt/2022-05-11T16:00:14/300/0/gtfs_rt...,2022-05-11,ad74d4ee-87d8-4567-afc7-93a3e8f2aee5,"[{'start': 1648836000, 'end': 1660460400}]","[{'agencyId': None, 'routeId': '3501', 'routeT...",OTHER_CAUSE,NO_SERVICE,"[{'text': '', 'language': None}]",[],[],[],[],,d4710fc0982bab7e7664327b34d8ed42


In [40]:
no_service_rts2 = [entity[0]['routeId'] for entity in no_service2.informed_entities.to_list()]

In [42]:
no_service_rts2.remove(None)

In [43]:
no_service_rts2

['2', '17', '3371', '3501']

In [65]:
## have to keep one date instead of 2

In [46]:
analysis_dates= '2022-05-11'
itp_ids= [290, 300]

In [50]:
routes2 = shared_utils.gtfs_utils.get_route_info(analysis_dates, itp_ids)

In [51]:
routes2.sample()

Unnamed: 0,feed_key,route_key,date,route_short_name,route_url,route_continuous_drop_off,route_type,calitp_extracted_at,route_color,route_desc,...,route_continuous_pickup,agency_id,calitp_itp_id,agency_url,route_long_name,agency_phone,agency_timezone,calitp_deleted_at,agency_name,calitp_url_number
145,-4021584186578633324,284227301063660239,2022-05-11,57,,,3,2022-01-11,,,...,,,290,,Edgewater/Beach Park - Hillsdale High,,,2022-06-19,,0


In [52]:
routes2 = routes2 >> filter(_.route_id.isin(no_service_rts2))

In [53]:
routes2

Unnamed: 0,feed_key,route_key,date,route_short_name,route_url,route_continuous_drop_off,route_type,calitp_extracted_at,route_color,route_desc,...,route_continuous_pickup,agency_id,calitp_itp_id,agency_url,route_long_name,agency_phone,agency_timezone,calitp_deleted_at,agency_name,calitp_url_number
47,4100255323842349957,7100850145565938766,2022-05-11,44,http://bigbluebus.com/Routes-and-Schedules/Rou...,,3,2022-03-08,E87D1E,,...,,6216179,300,http://www.bigbluebus.com,17th St Sta - SMC Bundy Campus,310-451-5444,America/Los_Angeles,2022-09-01,Big Blue Bus,0
57,3742542532326916514,5751554105033898922,2022-05-11,17,,,3,2021-04-16,,,...,,SM,290,http://www.samtrans.com,Linda Mar Park & Ride - Pescadero,800-660-4287,America/Los_Angeles,2022-08-08,SamTrans,1


In [55]:
trips2 = shared_utils.gtfs_utils.get_trips(analysis_dates, itp_ids)
trips2 = trips2 >> filter(_.route_id.isin(no_service_rts2))

In [56]:
trips2

Unnamed: 0,feed_key,trip_key,trip_id,route_id,calitp_itp_id,calitp_url_number,service_id,service_date,service_indicator,service_start_date,...,wheelchair_accessible,bikes_allowed,direction_id,shape_id,calitp_extracted_at,calitp_deleted_at,trip_headsign,calitp_hash,block_id,trip_short_name
484,3742542532326916514,-3334599700554942862,171032,17,290,1,73025,2022-05-11,1,2022-05-04,...,,,0,170545,2022-05-04,2022-06-07,Linda Mar Park & Ride,rzX2R4swEFG4SyHBjbW5fg==,,171032
485,3742542532326916514,-3533664886309114020,171033,17,290,1,73025,2022-05-11,1,2022-05-04,...,,,1,170546,2022-05-04,2022-06-07,Miramontes/Moonridge Apts,mifqBKtCTwk5re9edgYSjQ==,,171033
486,3742542532326916514,4942227074902548420,171028,17,290,1,73025,2022-05-11,1,2022-05-04,...,,,0,170545,2022-05-04,2022-06-07,Linda Mar Park & Ride,wFOZ4VaxGiZTF9PYLTxbSg==,,171028
487,3742542532326916514,-2699324723585186774,171024,17,290,1,73025,2022-05-11,1,2022-05-04,...,,,0,170545,2022-05-04,2022-06-07,Linda Mar Park & Ride,T66tRHa0JxTl5RFqN9kkAg==,,171024
488,3742542532326916514,-4126684442836698451,171006,17,290,1,73025,2022-05-11,1,2022-05-04,...,,,0,170523,2022-05-04,2022-06-07,Linda Mar Park & Ride,IHfW8LwDCHrIUQsfLuWiXg==,,171006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3205,4100255323842349957,-5118958034454309391,883656,3501,300,0,77710,2022-05-11,0,2022-03-27,...,0,0,0,25955,2022-03-29,2022-07-18,17th St Station,VwMkm9NPe0ZC4YfJAeyBDA==,,
3206,4100255323842349957,-8564373523420756858,883696,3501,300,0,40510,2022-05-11,0,2022-03-27,...,0,0,1,25956,2022-03-29,2022-07-18,SMC Bundy Campus,zeCUS/0NFCLHGc4e/8nQxw==,,
3207,4100255323842349957,5755218391950692981,883675,3501,300,0,77710,2022-05-11,0,2022-03-27,...,0,0,1,25956,2022-03-29,2022-07-18,SMC Bundy Campus,spysr/x3daTX2GRV9Gx+CQ==,,
3208,4100255323842349957,-4606747982320804028,883651,3501,300,0,40510,2022-05-11,0,2022-03-27,...,0,0,0,25955,2022-03-29,2022-07-18,17th St Station,7su1G62rqME+Gby+bl2ldg==,,


In [62]:
no_service_agg2 = trips2 >> group_by(_.calitp_itp_id) >>count(_.route_id)

In [63]:
no_service_agg2 = no_service_agg2.rename(columns={'n':'n_no_service'})

In [64]:
no_service_agg2

Unnamed: 0,calitp_itp_id,route_id,n_no_service
0,290,17,33
1,300,3501,104


In [70]:
##filtering just for service date since we already have the two org
## in the future will need to filter for org
rt_sched_filtered2 = rt_sched>>filter(_.service_date == '2022-05-12')

In [71]:
rt_sched_filtered2

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district
20,290,SamTrans,1,110,110,2022-05-12,2021-04-16,2099-01-01,50,50,1.00,Thursday,May,04 - Oakland
79,290,SamTrans,1,120,120,2022-05-12,2021-04-16,2099-01-01,139,139,1.00,Thursday,May,04 - Oakland
125,290,SamTrans,1,121,121,2022-05-12,2021-04-16,2022-07-23,32,32,1.00,Thursday,May,04 - Oakland
155,290,SamTrans,1,122,122,2022-05-12,2021-04-16,2099-01-01,74,67,0.91,Thursday,May,04 - Oakland
231,290,SamTrans,1,130,130,2022-05-12,2021-04-16,2022-07-23,130,130,1.00,Thursday,May,04 - Oakland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3829,300,Big Blue Bus,0,3500,43,2022-05-12,2022-03-08,2022-09-01,35,35,1.00,Thursday,May,07 - Los Angeles
3850,290,SamTrans,1,398,398,2022-05-12,2021-04-16,2022-08-08,36,36,1.00,Thursday,May,04 - Oakland
3907,300,Big Blue Bus,0,3498,41,2022-05-12,2022-03-08,2022-09-01,39,39,1.00,Thursday,May,07 - Los Angeles
3914,290,SamTrans,1,141,141,2022-05-12,2021-04-16,2022-07-23,54,54,1.00,Thursday,May,04 - Oakland


In [86]:
full = (pd.merge(rt_sched_filtered2, no_service_agg2, how='left', on= ['calitp_itp_id','route_id']))

In [87]:
full

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service
0,290,SamTrans,1,110,110,2022-05-12,2021-04-16,2099-01-01,50,50,1.00,Thursday,May,04 - Oakland,
1,290,SamTrans,1,120,120,2022-05-12,2021-04-16,2099-01-01,139,139,1.00,Thursday,May,04 - Oakland,
2,290,SamTrans,1,121,121,2022-05-12,2021-04-16,2022-07-23,32,32,1.00,Thursday,May,04 - Oakland,
3,290,SamTrans,1,122,122,2022-05-12,2021-04-16,2099-01-01,74,67,0.91,Thursday,May,04 - Oakland,
4,290,SamTrans,1,130,130,2022-05-12,2021-04-16,2022-07-23,130,130,1.00,Thursday,May,04 - Oakland,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,300,Big Blue Bus,0,3500,43,2022-05-12,2022-03-08,2022-09-01,35,35,1.00,Thursday,May,07 - Los Angeles,
82,290,SamTrans,1,398,398,2022-05-12,2021-04-16,2022-08-08,36,36,1.00,Thursday,May,04 - Oakland,
83,300,Big Blue Bus,0,3498,41,2022-05-12,2022-03-08,2022-09-01,39,39,1.00,Thursday,May,07 - Los Angeles,
84,290,SamTrans,1,141,141,2022-05-12,2021-04-16,2022-07-23,54,54,1.00,Thursday,May,04 - Oakland,


In [88]:
full>>filter(_.n_no_service.notnull())

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service
26,300,Big Blue Bus,0,3501,44,2022-05-12,2022-03-08,2022-09-01,104,0,0.0,Thursday,May,07 - Los Angeles,104.0
78,290,SamTrans,1,17,17,2022-05-12,2021-04-16,2022-08-08,33,33,1.0,Thursday,May,04 - Oakland,33.0


In [89]:
full['n_no_service'] = full['n_no_service'].fillna(value=0)

In [90]:
full['num_sched_true'] = full['num_sched'] - full['n_no_service']

In [91]:
full>>arrange(-_.n_no_service)

Unnamed: 0,calitp_itp_id,agency_name,calitp_url_number,route_id,route_short_name,service_date,calitp_extracted_at,calitp_deleted_at,num_sched,num_vp,pct_w_vp,weekday,month,caltrans_district,n_no_service,num_sched_true
26,300,Big Blue Bus,0,3501,44,2022-05-12,2022-03-08,2022-09-01,104,0,0.00,Thursday,May,07 - Los Angeles,104.00,0.00
78,290,SamTrans,1,17,17,2022-05-12,2021-04-16,2022-08-08,33,33,1.00,Thursday,May,04 - Oakland,33.00,0.00
0,290,SamTrans,1,110,110,2022-05-12,2021-04-16,2099-01-01,50,50,1.00,Thursday,May,04 - Oakland,0.00,50.00
1,290,SamTrans,1,120,120,2022-05-12,2021-04-16,2099-01-01,139,139,1.00,Thursday,May,04 - Oakland,0.00,139.00
2,290,SamTrans,1,121,121,2022-05-12,2021-04-16,2022-07-23,32,32,1.00,Thursday,May,04 - Oakland,0.00,32.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,300,Big Blue Bus,0,3500,43,2022-05-12,2022-03-08,2022-09-01,35,35,1.00,Thursday,May,07 - Los Angeles,0.00,35.00
82,290,SamTrans,1,398,398,2022-05-12,2021-04-16,2022-08-08,36,36,1.00,Thursday,May,04 - Oakland,0.00,36.00
83,300,Big Blue Bus,0,3498,41,2022-05-12,2022-03-08,2022-09-01,39,39,1.00,Thursday,May,07 - Los Angeles,0.00,39.00
84,290,SamTrans,1,141,141,2022-05-12,2021-04-16,2022-07-23,54,54,1.00,Thursday,May,04 - Oakland,0.00,54.00


notes:
    * interesting that SamTran's "No Service" Routes do have vehicle positions.. need to check that out 

In [94]:
# testing with utils using new true num_sched

utils.get_agg_pct(full,
                groupings = ['calitp_itp_id', 'agency_name'],
                sum_sched= 'num_sched_true',
                sum_vp = 'num_vp'
               )

Unnamed: 0,calitp_itp_id,agency_name,num_sched_true,num_vp,avg
0,290,SamTrans,1412.0,1389,0.98
1,300,Big Blue Bus,1412.0,1380,0.98


In [95]:
#using regular num_sched
utils.get_agg_pct(full,
                groupings = ['calitp_itp_id', 'agency_name'],
                sum_sched= 'num_sched',
                sum_vp = 'num_vp'
               )

Unnamed: 0,calitp_itp_id,agency_name,num_sched,num_vp,avg
0,290,SamTrans,1445,1389,0.96
1,300,Big Blue Bus,1516,1380,0.91
