In [1]:
#Chunk 1: Import libraries

import zipfile
import os
import pandas as pd
from datetime import datetime
import shutil
import sys

In [2]:
# Enter Park Code
park_name = input("Enter the Park Code: ").strip()

# Set which calendar to use
tool_mode = input("Do you want the tool to be based on 1. the current date or 2. a custom date range: ").strip()

# Debugging: Check the value of tool_mode
print(f"Tool mode entered: {tool_mode}")

if tool_mode == '1':  # Compare as string
    current_date_mode = input("Show 1. all active schedules or 2. all active and future schedules: ").strip()
    if current_date_mode == '1' or current_date_mode == '2':
        print(f"Current date mode entered: {current_date_mode}")
    else:
        print("Invalid option selected for tool mode.")
        sys.exit()
elif tool_mode == '2':  # Compare as string
    custom_start = input("Define custom start date as YYYYMMDD: ").strip()
    custom_end = input("Define custom end date as YYYYMMDD: ").strip()
    print(f"Creating GTFS for dates between {custom_start} and {custom_end}")
    custom_start = pd.to_datetime(custom_start, format='%Y%m%d')
    custom_end = pd.to_datetime(custom_end, format='%Y%m%d')
else:
    print("Invalid option selected for tool mode.")
    sys.exit()

# Append .zip to the file name
zip_file = f"{park_name}.zip"

# Check if the zip file exists
if os.path.exists(zip_file):
    # Create a ZipFile object
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Extract all contents to a folder named after the park code
        zip_ref.extractall(f"{park_name}_unzipped")
        print(f"Extracted {zip_file} to the folder {park_name}_unzipped")
else:
    print(f"The file {zip_file} does not exist.")
    sys.exit()


Enter the Park Code: SEKI
Do you want the tool to be based on 1. the current date or 2. a custom date range: 1
Tool mode entered: 1
Show 1. all active schedules or 2. all active and future schedules: 2
Current date mode entered: 2
Extracted SEKI.zip to the folder SEKI_unzipped


In [3]:
#Chunk 3: Import Calendar

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the calendar.txt file
calendar_file = os.path.join(unzipped_folder, "calendar.txt")

# Check if the file exists
if os.path.exists(calendar_file):
    # Load the calendar.txt into a pandas DataFrame
    calendar = pd.read_csv(calendar_file)
    print("Loaded calendar.txt into a DataFrame.")
    print(calendar.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {calendar_file} does not exist.")

Loaded calendar.txt into a DataFrame.
        service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
0     12dailyearly       1        1          1         1       1         1   
1        12weekday       1        1          1         1       1         0   
2      12dailylate       1        1          1         1       1         1   
3        12weekend       0        0          0         0       0         1   
4  3weekdayOPearly       1        1          1         1       1         0   

   sunday  start_date  end_date  
0       1    20240523  20240628  
1       0    20240701  20240816  
2       1    20240819  20240830  
3       1    20240629  20240818  
4       0    20240523  20240628  


In [4]:
# Convert start_date and end_date columns to datetime 
calendar['start_date'] = pd.to_datetime(calendar['start_date'], format='%Y%m%d')
calendar['end_date'] = pd.to_datetime(calendar['end_date'], format='%Y%m%d')

if tool_mode == '1':
    if current_date_mode == '2':
        # Get the current date as a datetime object
        current_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d'))
        # Filter rows where the current date is between start_date and end_date or end_date is in the future
        active_schedules = calendar[(calendar['start_date'] <= current_date) & (calendar['end_date'] >= current_date) | (calendar['end_date'] > current_date)]
        print(f"Active/Future schedules on {current_date.date()}:")
        print(active_schedules)
    else:
        # Get the current date as a datetime object
        current_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d'))
        # Filter rows where the current date is between start_date and end_date or end_date is in the future
        active_schedules = calendar[(calendar['start_date'] <= current_date) & (calendar['end_date'] >= current_date)] 
        print(f"Active schedules on {current_date.date()}:")
        print(active_schedules)
elif tool_mode == '2':
        # Filter rows where the schedule end_date is between custom_start and custom_end
        # or the schedule overlaps with the custom date range (starts before custom_end and ends after custom_start)
        active_schedules = calendar[(calendar['end_date'] >= custom_start) & (calendar['start_date'] <= custom_end)]
        # Display the filtered results
        print(f"Active schedules between {custom_start.date()} and {custom_end.date()}:")
        print(active_schedules)
else:
        print("ERROR")
        sys.exit()

Active/Future schedules on 2024-09-25:
       service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
10     winterfull       0        0          0         0       0         0   
11  winterpartial       0        0          0         0       0         0   

    sunday start_date   end_date  
10       0 2024-11-01 2024-12-15  
11       0 2024-12-15 2025-03-01  


In [5]:
#Chunk 5: Add trips file

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the trips.txt file
trips_file = os.path.join(unzipped_folder, "trips.txt")

# Check if the file exists
if os.path.exists(trips_file):
    # Load the trips.txt into a pandas DataFrame
    trips = pd.read_csv(trips_file)
    trips['wheelchair_accessible'] = trips['wheelchair_accessible'].fillna(0)
    trips['bikes_allowed'] = trips['bikes_allowed'].fillna(0)
    trips['wheelchair_accessible'] = trips['wheelchair_accessible'].apply(lambda x: str(int(x)))
    trips['bikes_allowed'] = trips['bikes_allowed'].apply(lambda x: str(int(x)))
    trips['wheelchair_accessible'] = trips['wheelchair_accessible'].replace(0, '')
    trips['bikes_allowed'] = trips['bikes_allowed'].replace(0, '')
    print("Loaded trips.txt into a DataFrame.")
    print(trips.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {trips_file} does not exist.")

Loaded trips.txt into a DataFrame.
  route_id    service_id                       trip_id        trip_headsign  \
0      1ob  12dailyearly  Route1OutboundDailyEarly_T01  Giant Forest Museum   
1      1ob  12dailyearly  Route1OutboundDailyEarly_T02  Giant Forest Museum   
2      1ob  12dailyearly  Route1OutboundDailyEarly_T03  Giant Forest Museum   
3      1ob  12dailyearly  Route1OutboundDailyEarly_T04  Giant Forest Museum   
4      1ob  12dailyearly  Route1OutboundDailyEarly_T05  Giant Forest Museum   

   direction_id  block_id shape_id wheelchair_accessible bikes_allowed  
0             0       NaN      1ob                     0             0  
1             0       NaN      1ob                     0             0  
2             0       NaN      1ob                     0             0  
3             0       NaN      1ob                     0             0  
4             0       NaN      1ob                     0             0  


In [6]:
#Chunk 6: Filter to active trips

# Filter trips where service_id in trips matches any service_id in active_schedules
active_trips = trips[trips['service_id'].isin(active_schedules['service_id'])]

print("Active trips based on active schedules:")
print(active_trips)

Active trips based on active schedules:
             route_id     service_id                   trip_id  \
1227     winterfullob     winterfull    WinterFullOutbound_T01   
1228     winterfullob     winterfull    WinterFullOutbound_T02   
1229     winterfullob     winterfull    WinterFullOutbound_T03   
1230     winterfullob     winterfull    WinterFullOutbound_T04   
1231     winterfullob     winterfull    WinterFullOutbound_T05   
...               ...            ...                       ...   
1330  winterpartialib  winterpartial  WinterPartialInbound_T23   
1331  winterpartialib  winterpartial  WinterPartialInbound_T24   
1332  winterpartialib  winterpartial  WinterPartialInbound_T25   
1333  winterpartialib  winterpartial  WinterPartialInbound_T26   
1334  winterpartialib  winterpartial  WinterPartialInbound_T27   

                      trip_headsign  direction_id  block_id         shape_id  \
1227  Wuksachi Lodge and Restaurant             0       NaN     winterfullob   
1228  W

In [7]:
#Chunk 7: Add stop_times 

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the stop_times.txt file
stop_times_file = os.path.join(unzipped_folder, "stop_times.txt")

# Check if the file exists
if os.path.exists(stop_times_file):
    # Load the stop_times.txt into a pandas DataFrame
    stop_times = pd.read_csv(stop_times_file)
    print("Loaded stop_times.txt into a DataFrame.")
    print(stop_times.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {stop_times_file} does not exist.")

Loaded stop_times.txt into a DataFrame.
                        trip_id arrival_time departure_time  stop_id  \
0  Route1OutboundDailyEarly_T01     08:00:00       08:00:00        8   
1  Route1OutboundDailyEarly_T01     08:06:00       08:06:00        7   
2  Route1OutboundDailyEarly_T01     08:16:00       08:16:00        4   
3  Route1OutboundDailyEarly_T01     08:24:00       08:24:00        3   
4  Route1OutboundDailyEarly_T02     08:23:00       08:23:00        8   

   stop_sequence  stop_headsign  pickup_type  drop_off_type  \
0              1            NaN          NaN            NaN   
1              2            NaN          NaN            NaN   
2              3            NaN          NaN            NaN   
3              4            NaN          NaN            NaN   
4              1            NaN          NaN            NaN   

   continuous_pickup  continuous_drop_off  shape_dist_traveled  timepoint  
0                NaN                  NaN                0.000          

In [8]:
#Chunk 7: Fitler out active stop times
active_stop_times = stop_times[stop_times['trip_id'].isin(active_trips['trip_id'])]

print(f"Active stop times DataFrame created with {len(active_stop_times)} rows.")
print(active_stop_times.head())  # Print the first few rows of the active_stop_times DataFrame

Active stop times DataFrame created with 432 rows.
                     trip_id arrival_time departure_time  stop_id  \
3527  WinterFullOutbound_T01     10:00:00       10:00:00        3   
3528  WinterFullOutbound_T01     10:09:00       10:09:00        4   
3529  WinterFullOutbound_T01     10:18:00       10:18:00        5   
3530  WinterFullOutbound_T01     10:23:00       10:23:00        7   
3531  WinterFullOutbound_T01     10:30:00       10:30:00        9   

      stop_sequence  stop_headsign  pickup_type  drop_off_type  \
3527              1            NaN          NaN            NaN   
3528              2            NaN          NaN            NaN   
3529              3            NaN          NaN            NaN   
3530              4            NaN          NaN            NaN   
3531              5            NaN          NaN            NaN   

      continuous_pickup  continuous_drop_off  shape_dist_traveled  timepoint  
3527                NaN                  NaN              

In [9]:
# Chunk 9: Add shape files

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the shapes.txt file
shape_file = os.path.join(unzipped_folder, "shapes.txt")

# Check if the file exists
if os.path.exists(shape_file):
    # Load the shapes.txt into a pandas DataFrame
    shapes = pd.read_csv(shape_file)
    print("Loaded shapes.txt into a DataFrame.")
    print(shapes.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {shape_file} does not exist.")

Loaded shapes.txt into a DataFrame.
  shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence  shape_dist_traveled
0      1ib      36.56433    -118.77326                  1                0.000
1      1ib      36.56438    -118.77330                  2                0.007
2      1ib      36.56447    -118.77335                  3                0.018
3      1ib      36.56459    -118.77348                  4                0.035
4      1ib      36.56465    -118.77342                  5                0.044


In [10]:
#Chunk 10: Filter to active shapes
active_shapes = shapes[shapes['shape_id'].isin(active_trips['shape_id'])]

print(f"Active shapes DataFrame created with {len(active_shapes)} rows.")
print(active_shapes.head())  # Print the first few rows of the active_shapes DataFrame

Active shapes DataFrame created with 3600 rows.
          shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence  \
7138  winterfullob      36.56433    -118.77326                  1   
7139  winterfullob      36.56438    -118.77330                  2   
7140  winterfullob      36.56447    -118.77335                  3   
7141  winterfullob      36.56459    -118.77348                  4   
7142  winterfullob      36.56469    -118.77338                  5   

      shape_dist_traveled  
7138                0.000  
7139                0.007  
7140                0.018  
7141                0.035  
7142                0.050  


In [11]:
# Chunk 11: Add calendar_dates files

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the shapes.txt file
cd_file = os.path.join(unzipped_folder, "calendar_dates.txt")

# Check if the file exists
if os.path.exists(cd_file):
    # Load the calendar_dates.txt into a pandas DataFrame
    cdates = pd.read_csv(cd_file)
    print("Loaded calendar_dates.txt into a DataFrame.")
    print(cdates.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {shape_file} does not exist.")

Loaded calendar_dates.txt into a DataFrame.
     service_id      date  exception_type
0  12dailyearly  20240527               2
1     12weekend  20240527               1
2  12dailyearly  20240619               2
3     12weekend  20240619               1
4     12weekday  20240704               2


In [12]:
#Chunk 12: Filter to active calendar dates

# Filter trips where service_id in trips matches any service_id in active_schedules
active_cdates = cdates[cdates['service_id'].isin(active_schedules['service_id'])]

print("Active calendar dates based on active schedules:")
print(active_cdates)

Active calendar dates based on active schedules:
       service_id      date  exception_type
14     winterfull  20241128               1
15     winterfull  20241129               1
16     winterfull  20241130               1
17     winterfull  20241201               1
18  winterpartial  20241224               1
19  winterpartial  20241225               1
20  winterpartial  20241226               1
21  winterpartial  20241227               1
22  winterpartial  20241228               1
23  winterpartial  20241229               1
24  winterpartial  20241230               1
25  winterpartial  20241231               1
26  winterpartial  20250101               1
27  winterpartial  20250118               1
28  winterpartial  20250119               1
29  winterpartial  20250215               1
30  winterpartial  20250216               1


In [13]:
#Chunk 13: Convert calendar dates back to GTFS format (YYYYMMDD)
active_schedules['start_date'] = active_schedules['start_date'].dt.strftime('%Y%m%d')
active_schedules['end_date'] = active_schedules['end_date'].dt.strftime('%Y%m%d')

print("Converted dates in active_schedules DataFrame:")
print(active_schedules.head())  # Print the first few rows to verify the changes

Converted dates in active_schedules DataFrame:
       service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
10     winterfull       0        0          0         0       0         0   
11  winterpartial       0        0          0         0       0         0   

    sunday start_date  end_date  
10       0   20241101  20241215  
11       0   20241215  20250301  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  active_schedules['start_date'] = active_schedules['start_date'].dt.strftime('%Y%m%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  active_schedules['end_date'] = active_schedules['end_date'].dt.strftime('%Y%m%d')


In [14]:
#Chunk 14: Create new archive for the active, filtered GTFS feed

# Define the source and destination folders 
source_folder = f"{park_name}_unzipped"
destination_folder = f"{park_name}_active_unzipped"

# Check if the destination folder exists
if os.path.exists(destination_folder):
    # Delete all files in the destination folder
    for file_name in os.listdir(destination_folder):
        file_path = os.path.join(destination_folder, file_name)
        if os.path.isfile(file_path):
            os.remove(file_path)
    print(f"Deleted all files in {destination_folder}.")
else:
    # Create the destination folder if it doesn't exist
    os.makedirs(destination_folder, exist_ok=True)
    print(f"Created destination folder: {destination_folder}.")

# Check if the source folder exists
if os.path.exists(source_folder):
    # Transfer all files from source to destination
    for file_name in os.listdir(source_folder):
        # Construct full file path
        full_file_name = os.path.join(source_folder, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, destination_folder)
    
    print(f"All files transferred from {source_folder} to {destination_folder}.")
else:
    print(f"The source folder {source_folder} does not exist.")

Created destination folder: SEKI_active_unzipped.
All files transferred from SEKI_unzipped to SEKI_active_unzipped.


In [15]:
#Chunk 15: Delete any old files, update the calendar, text, shapes, and stoptimes to active versions

# Define the paths to the files to delete
calendar_file_path = os.path.join(destination_folder, "calendar.txt")
trips_file_path = os.path.join(destination_folder, "trips.txt")
stop_times_file_path = os.path.join(destination_folder, "stop_times.txt")
shape_file_path = os.path.join(destination_folder, "shapes.txt")
cdate_file_path = os.path.join(destination_folder, "calendar_dates.txt")

# Delete the calendar.txt and trips.txt files
if os.path.exists(calendar_file_path):
    os.remove(calendar_file_path)
    print(f"Deleted {calendar_file_path}.")
else:
    print(f"{calendar_file_path} does not exist.")

if os.path.exists(trips_file_path):
    os.remove(trips_file_path)
    print(f"Deleted {trips_file_path}.")
else:
    print(f"{trips_file_path} does not exist.")

if os.path.exists(stop_times_file_path):
    os.remove(stop_times_file_path)
    print(f"Deleted {stop_times_file_path}.")
else:
    print(f"{stop_times_file_path} does not exist.")
    
if os.path.exists(shape_file_path):
    os.remove(shape_file_path)
    print(f"Deleted {shape_file_path}.")
else:
    print(f"{shape_file_path} does not exist.")
    
if os.path.exists(cdate_file_path):
    os.remove(cdate_file_path)
    print(f"Deleted {cdate_file_path}.")
else:
    print(f"{cdate_file_path} does not exist.")
    
# Save active info to folder
calendar_file = os.path.join(destination_folder, "calendar.txt")
trips_file = os.path.join(destination_folder, "trips.txt")
stop_times_file = os.path.join(destination_folder, "stop_times.txt")
shape_file = os.path.join(destination_folder, "shapes.txt")
cdate_file = os.path.join(destination_folder, "calendar_dates.txt")

active_schedules.to_csv(calendar_file, sep=',', index=False)
active_trips.to_csv(trips_file, sep=',', index=False)
active_stop_times.to_csv(stop_times_file, sep=',', index=False)
active_shapes.to_csv(shape_file, sep=',', index=False)
active_cdates.to_csv(cdate_file, sep=',', index=False)

print(f"Saved active schedules to {calendar_file}.")
print(f"Saved active trips to {trips_file}.")
print(f"Saved active stop times to {stop_times_file}.")
print(f"Saved active shapes to {shape_file}.")
print(f"Saved active calendar dates to {cdate_file}.")

Deleted SEKI_active_unzipped\calendar.txt.
Deleted SEKI_active_unzipped\trips.txt.
Deleted SEKI_active_unzipped\stop_times.txt.
Deleted SEKI_active_unzipped\shapes.txt.
Deleted SEKI_active_unzipped\calendar_dates.txt.
Saved active schedules to SEKI_active_unzipped\calendar.txt.
Saved active trips to SEKI_active_unzipped\trips.txt.
Saved active stop times to SEKI_active_unzipped\stop_times.txt.
Saved active shapes to SEKI_active_unzipped\shapes.txt.
Saved active calendar dates to SEKI_active_unzipped\calendar_dates.txt.


In [16]:
#Chunk 16: Zip it all back up

# Define the path to the active unzipped folder and the zip file name
active_folder = destination_folder
zip_file_name = f"{park_name}_active"

# Create a zip file from the active folder
shutil.make_archive(zip_file_name, 'zip', active_folder)

print(f"Created zip file: {zip_file_name}.zip")

Created zip file: SEKI_active.zip
