In [46]:
import pandas as pd
import datetime as dt
import os

def parse_feed_info_file(feed_info_file_path):
    feed_info_df = pd.read_csv(feed_info_file_path)
    # save start and end date of feed and convert them to datetime
    feed_info_df['feed_start_date'] = pd.to_datetime(feed_info_df['feed_start_date'], format='%Y%m%d')
    feed_info_df['feed_end_date'] = pd.to_datetime(feed_info_df['feed_end_date'], format='%Y%m%d')
    return feed_info_df.iloc[0]['feed_start_date'], feed_info_df.iloc[0]['feed_end_date']

def parse_calendar_file(calendar_file_path):
    calendar_df = pd.read_csv(calendar_file_path)
    # Convert start_date and end_date to datetime objects
    calendar_df['start_date'] = pd.to_datetime(calendar_df['start_date'], format='%Y%m%d')
    calendar_df['end_date'] = pd.to_datetime(calendar_df['end_date'], format='%Y%m%d')

    calendar_data = {}
    for index, row in calendar_df.iterrows():
        service_id = row['service_id']
        start_date = row['start_date']
        end_date = row['end_date']
        days = row[['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']]
        for date in pd.date_range(start_date, end_date):
            if date not in calendar_data:
                calendar_data[date] = set()
            if days.iloc[date.dayofweek] == 1:
                calendar_data[date].add(service_id)
    return calendar_data

def parse_calendar_dates_file(calendar_dates_file_path):
    calendar_dates_df = pd.read_csv(calendar_dates_file_path)
    # Convert date to datetime object
    calendar_dates_df['date'] = pd.to_datetime(calendar_dates_df['date'], format='%Y%m%d')
    calendar_dates_data = {}
    for index, row in calendar_dates_df.iterrows():
        service_id = row['service_id']
        date = row['date']
        exception_type = row['exception_type']
        if date not in calendar_dates_data:
            calendar_dates_data[date] = set()
        if exception_type == 1:
            calendar_dates_data[date].add(service_id)
        elif exception_type == 2:
            if service_id in calendar_dates_data[date]:
                calendar_dates_data[date].remove(service_id)
    return calendar_dates_data

def generate_schedule(feed_start_date, feed_end_date, calendar_data, calendar_dates_data):
    schedule = []
    for date in pd.date_range(feed_start_date, feed_end_date):
        day_of_week = date.strftime('%A')
        service_ids = calendar_data.get(date, set()) | calendar_dates_data.get(date, set())
        schedule.append((date, day_of_week, service_ids))
    return schedule

In [54]:
working_path = 'C:\\Users\\antonio.forte\\Dropbox (MIT)\\GitHub\\getGTFS-RT\\MBTA_PostRatingRecap_ArrDepTimes'
calendar_file_path = os.path.join(working_path, 'gtfsSchedule','gtfs_2022-12-18_2023-03-11_Winter2023PostRecap','calendar.txt')
calendar_dates_file_path = os.path.join(working_path, 'gtfsSchedule','gtfs_2022-12-18_2023-03-11_Winter2023PostRecap','calendar_dates.txt')
feed_info_file_path = os.path.join(working_path, 'gtfsSchedule','gtfs_2022-12-18_2023-03-11_Winter2023PostRecap','feed_info.txt')

feed_start_date, feed_end_date = parse_feed_info_file(feed_info_file_path)
calendar_data = parse_calendar_file(calendar_file_path)
calendar_dates_data = parse_calendar_dates_file(calendar_dates_file_path)
calendar = generate_schedule(feed_start_date, feed_end_date, calendar_data, calendar_dates_data)
calendar_df = pd.DataFrame(calendar, columns=['date', 'day_of_week', 'service_ids'])

calendar_df.head()

Unnamed: 0,date,day_of_week,service_ids
0,2022-12-18,Sunday,"{RTL123-3-Su-01, LRV123-7-Su-01, RTL123-F-Su-0..."
1,2022-12-19,Monday,"{FLL 22-N-Wdy-1, PRIV123-1-Wdy-01, RTL123-D-Wd..."
2,2022-12-20,Tuesday,"{FLL 22-N-Wdy-1, PRIV123-1-Wdy-01, RTL123-D-Wd..."
3,2022-12-21,Wednesday,"{FLL 22-N-Wdy-1, PRIV123-1-Wdy-01, RTL123-D-Wd..."
4,2022-12-22,Thursday,"{FLL 22-N-Wdy-1, PRIV123-1-Wdy-01, RTL123-D-Wd..."


In [48]:
bus_service = [           'WinterWeekday',          'BUS123-C-Wdy-02',
           'WinterSaturday',    'MartinLutherKingDay-1',
             'WinterSunday', 'ChristmasDay(Observed)-1',
           'ChristmasDay-1',          'BUS123-A-Wdy-02',
          'BUS123-2-Wdy-02',          'BUS123-5-Wdy-02',
          'BUS123-4-Wdy-02',          'BUS123-3-Wdy-02',
          'BUS123-6-Wdy-02',          'BUS123-1-Wdy-02',
          'BUS123-7-Wdy-02',          'BUS123-8-Wdy-02',
          'BUS123-9-Wdy-02',          'BUS123-B-Wdy-02',
         'PRIV123-6-Wdy-01',         'PRIV123-1-Wdy-01',
          'PRIV123-2-Sa-01',         'PRIV123-3-Wdy-01',
          'PRIV123-4-Sa-01',          'PRIV123-5-Su-01']

# Remove all services that are not bus services
schedule_df['bus_service'] = schedule_df['service_ids'].apply(lambda x: x & set(bus_service))
schedule_df = schedule_df[schedule_df['bus_service'].apply(len) > 0]
schedule_df = schedule_df.drop(columns='service_ids')

In [57]:
df2 = schedule_df.copy()
january_bus_service = df2.loc[(df2['date'].dt.month == 1) & (df2['date'].dt.year == 2023),['date', 'bus_service']]
print(january_bus_service)
route_10_services = [            'WinterSunday', 'ChristmasDay(Observed)-1',
           'ChristmasDay-1',          'BUS123-5-Wdy-02',
          'BUS123-4-Wdy-02',          'BUS123-3-Wdy-02',
          'BUS123-6-Wdy-02']
# Build a series containing the intersection of the bus services and the route 10 services for every date
january_bus_service['route_10_services'] = january_bus_service['bus_service'].apply(lambda x: x & set(route_10_services))
january_bus_service['route_10_services'].reset_index(drop=True)

         date                                        bus_service
14 2023-01-01                  {ChristmasDay-1, PRIV123-5-Su-01}
15 2023-01-02  {ChristmasDay(Observed)-1, PRIV123-1-Wdy-01, P...
16 2023-01-03  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
17 2023-01-04  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
18 2023-01-05  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
19 2023-01-06  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-3-Wdy...
20 2023-01-07  {PRIV123-4-Sa-01, WinterSaturday, PRIV123-2-Sa...
21 2023-01-08                    {WinterSunday, PRIV123-5-Su-01}
22 2023-01-09  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
23 2023-01-10  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
24 2023-01-11  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
25 2023-01-12  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-6-Wdy...
26 2023-01-13  {WinterWeekday, PRIV123-1-Wdy-01, BUS123-3-Wdy...
27 2023-01-14  {PRIV123-4-Sa-01, WinterSaturday, PRIV123-2-Sa...
28 2023-01-15            

0               {ChristmasDay-1}
1     {ChristmasDay(Observed)-1}
2              {BUS123-6-Wdy-02}
3              {BUS123-6-Wdy-02}
4              {BUS123-6-Wdy-02}
5              {BUS123-3-Wdy-02}
6                             {}
7                 {WinterSunday}
8              {BUS123-6-Wdy-02}
9              {BUS123-6-Wdy-02}
10             {BUS123-6-Wdy-02}
11             {BUS123-6-Wdy-02}
12             {BUS123-3-Wdy-02}
13                            {}
14                {WinterSunday}
15             {BUS123-6-Wdy-02}
16             {BUS123-6-Wdy-02}
17             {BUS123-6-Wdy-02}
18             {BUS123-6-Wdy-02}
19             {BUS123-3-Wdy-02}
20                            {}
21                {WinterSunday}
22             {BUS123-6-Wdy-02}
23             {BUS123-6-Wdy-02}
24             {BUS123-6-Wdy-02}
25             {BUS123-6-Wdy-02}
26             {BUS123-3-Wdy-02}
27                            {}
28                {WinterSunday}
29             {BUS123-6-Wdy-02}
30        