In [1]:
import zipfile
import os
import pandas as pd
from datetime import datetime
import shutil
import sys

In [2]:
## Choose what park to work with. Assumes the GTFS is saved in the same notebook as [FourLetterParkCode].zip, i.e. ZION.zip
park_name = input("Enter the Park Code: ").strip()

zip_file = f"{park_name}.zip"

if os.path.exists(zip_file):
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(f"{park_name}_unzipped")
        print(f"Extracted {zip_file} to the folder {park_name}_unzipped")
else:
    print(f"The file {zip_file} does not exist.")
    sys.exit()

Enter the Park Code: STLI
Extracted STLI.zip to the folder STLI_unzipped


In [3]:
##Load in the calendar
unzipped_folder = f"{park_name}_unzipped"
calendar_file = os.path.join(unzipped_folder, "calendar.txt")
if os.path.exists(calendar_file):
    calendar = pd.read_csv(calendar_file)
    print("Loaded calendar.txt into a DataFrame.")
    print(calendar.head())
else:
    print(f"The file {calendar_file} does not exist.")

Loaded calendar.txt into a DataFrame.
       service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
0         NJEarly       1        1          1         1       1         1   
1          NJFall       1        1          1         1       1         1   
2        NJWinter       1        1          1         1       1         1   
3       NJHoliday       1        1          1         1       1         1   
4  NYEarlyWeekday       1        1          1         1       1         0   

   sunday  start_date  end_date  
0       1    20240913  20241014  
1       1    20241015  20241201  
2       1    20241202  20241213  
3       1    20241214  20250105  
4       0    20240913  20241014  


In [11]:
#Switch to active calendar based on today's date
calendar['start_date'] = pd.to_datetime(calendar['start_date'], errors='coerce')
calendar['end_date'] = pd.to_datetime(calendar['end_date'], errors='coerce')
if calendar['start_date'].isnull().any():
    print("Some start dates could not be converted:")
    print(calendar[calendar['start_date'].isnull()])

if calendar['end_date'].isnull().any():
    print("Some end dates could not be converted:")
    print(calendar[calendar['end_date'].isnull()])
current_date = pd.Timestamp.now().normalize()
current_weekday = current_date.weekday()
weekdays = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
current_weekday_name = weekdays[current_weekday]
print(f"Current date: {current_date.date()}")
print(f"Current weekday: {current_weekday_name}")
active_schedules = calendar[
    (calendar['start_date'] <= current_date) &
    (calendar['end_date'] >= current_date) &
    (calendar[current_weekday_name] == 1)  # Use the weekday name to filter
]

if not active_schedules.empty:
    print(f"Active schedules on {current_date.date()}:")
    print(active_schedules[['service_id', 'start_date', 'end_date']])
else:
    print(f"No active schedules found on {current_date.date()}.")


Current date: 2024-10-28
Current weekday: monday
Active schedules on 2024-10-28:
        service_id start_date   end_date
1           NJFall 2024-10-15 2024-12-01
5  NYWinterWeekday 2024-10-15 2025-01-05


In [12]:
#Load in Trips
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the trips.txt file
trips_file = os.path.join(unzipped_folder, "trips.txt")
    
if os.path.exists(trips_file): 
    # Load the trips.txt into a pandas DataFrame
    trips = pd.read_csv(trips_file)
    
    print("Loaded trips.txt into a DataFrame.")
    print(trips.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {trips_file} does not exist.")

Loaded trips.txt into a DataFrame.
  route_id service_id                            trip_id   trip_headsign  \
0       NJ    NJEarly  Liberty State Park Loop Early_T01  Liberty Island   
1       NJ    NJEarly  Liberty State Park Loop Early_T02  Liberty Island   
2       NJ    NJEarly  Liberty State Park Loop Early_T03  Liberty Island   
3       NJ    NJEarly  Liberty State Park Loop Early_T04  Liberty Island   
4       NJ    NJEarly  Liberty State Park Loop Early_T05  Liberty Island   

   direction_id  block_id shape_id  wheelchair_accessible  bikes_allowed  
0             1       NaN       NJ                      1              2  
1             1       NaN       NJ                      1              2  
2             1       NaN       NJ                      1              2  
3             1       NaN       NJ                      1              2  
4             1       NaN       NJ                      1              2  


In [13]:
#List of trips running today
active_trips = trips[trips['service_id'].isin(active_schedules['service_id'])]
print("Active trips based on active schedules:")
print(active_trips)

Active trips based on active schedules:
   route_id       service_id  \
11       NJ           NJFall   
12       NJ           NJFall   
13       NJ           NJFall   
14       NJ           NJFall   
15       NJ           NJFall   
16       NJ           NJFall   
17       NJ           NJFall   
18       NJ           NJFall   
19       NJ           NJFall   
20       NJ           NJFall   
21       NJ           NJFall   
41  EILILSP           NJFall   
44    EILSP           NJFall   
65       NY  NYWinterWeekday   
66       NY  NYWinterWeekday   
67       NY  NYWinterWeekday   
68       NY  NYWinterWeekday   
69       NY  NYWinterWeekday   
70       NY  NYWinterWeekday   
71       NY  NYWinterWeekday   
72       NY  NYWinterWeekday   
73       NY  NYWinterWeekday   
74       NY  NYWinterWeekday   
75       NY  NYWinterWeekday   
76       NY  NYWinterWeekday   
77       NY  NYWinterWeekday   
78       NY  NYWinterWeekday   
79       NY  NYWinterWeekday   
80       NY  NYWinterWeekday   


In [14]:
#Load all stop times
unzipped_folder = f"{park_name}_unzipped"
stop_times_file = os.path.join(unzipped_folder, "stop_times.txt")
if os.path.exists(stop_times_file):
    stop_times = pd.read_csv(stop_times_file)
    print("Loaded stop_times.txt into a DataFrame.")
    print(stop_times.head()) 
else:
    print(f"The file {stop_times_file} does not exist.")

Loaded stop_times.txt into a DataFrame.
                             trip_id arrival_time departure_time stop_id  \
0  Liberty State Park Loop Early_T01     09:00:00       09:00:00    1_NJ   
1  Liberty State Park Loop Early_T01     09:20:00       09:20:00    2_NJ   
2  Liberty State Park Loop Early_T01     09:40:00       09:40:00    3_NJ   
3  Liberty State Park Loop Early_T01     09:55:00       09:55:00    1_NJ   
4  Liberty State Park Loop Early_T02     09:45:00       09:45:00    1_NJ   

   stop_sequence  stop_headsign  pickup_type  drop_off_type  \
0              1            NaN          NaN            NaN   
1              2            NaN          NaN            NaN   
2              3            NaN          NaN            NaN   
3              4            NaN          NaN            NaN   
4              1            NaN          NaN            NaN   

   continuous_pickup  continuous_drop_off  shape_dist_traveled  timepoint  
0                NaN                  NaN       

In [17]:
#Return Active stop_times
active_stop_times = stop_times[stop_times['trip_id'].isin(active_trips['trip_id'])]

print(f"There are {len(active_stop_times)} trips running on {current_date.date()}, which is a {current_weekday_name}")
print(active_stop_times.head())

There are 128 trips running on 2024-10-28, which is a monday
                             trip_id arrival_time departure_time stop_id  \
44  Liberty State Park Loop Fall_T01     09:00:00       09:00:00    1_NJ   
45  Liberty State Park Loop Fall_T01     09:20:00       09:20:00    2_NJ   
46  Liberty State Park Loop Fall_T01     09:40:00       09:40:00    3_NJ   
47  Liberty State Park Loop Fall_T01     09:55:00       09:55:00    1_NJ   
48  Liberty State Park Loop Fall_T02     09:30:00       09:30:00    1_NJ   

    stop_sequence  stop_headsign  pickup_type  drop_off_type  \
44              1            NaN          NaN            NaN   
45              2            NaN          NaN            NaN   
46              3            NaN          NaN            NaN   
47              4            NaN          NaN            NaN   
48              1            NaN          NaN            NaN   

    continuous_pickup  continuous_drop_off  shape_dist_traveled  timepoint  
44                Na