In [1]:
#Chunk 1: Import libraries

import zipfile
import os
import pandas as pd
from datetime import datetime
import shutil

In [2]:
#Chunk 2: Import GTFS feed with all schedules 

#Enter Park Code
park_name = input("Enter the Park Code: ")

# Append .zip to the file name
zip_file = f"{park_name}.zip"

# Check if the zip file exists
if os.path.exists(zip_file):
    # Create a ZipFile object
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Extract all contents to a folder named after the park code
        zip_ref.extractall(f"{park_name}_unzipped")
        print(f"Extracted {zip_file} to the folder {park_name}_unzipped")
else:
    print(f"The file {zip_file} does not exist.")


Enter the Park Code: GRCA
Extracted GRCA.zip to the folder GRCA_unzipped


In [3]:
#Chunk 3: Import Calendar

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the calendar.txt file
calendar_file = os.path.join(unzipped_folder, "calendar.txt")

# Check if the file exists
if os.path.exists(calendar_file):
    # Load the calendar.txt into a pandas DataFrame
    calendar = pd.read_csv(calendar_file)
    print("Loaded calendar.txt into a DataFrame.")
    print(calendar.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {calendar_file} does not exist.")

Loaded calendar.txt into a DataFrame.
     service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
0        spring       1        1          1         1       1         1   
1        summer       1        1          1         1       1         1   
2  summerhikers       1        1          1         1       1         1   
3          fall       1        1          1         1       1         1   
4        winter       1        1          1         1       1         1   

   sunday  start_date  end_date  
0       1    20240301  20240524  
1       1    20240525  20240906  
2       1    20240525  20240831  
3       1    20240907  20241130  
4       1    20231201  20240229  


In [4]:
# Convert start_date and end_date columns to datetime 
calendar['start_date'] = pd.to_datetime(calendar['start_date'], format='%Y%m%d')
calendar['end_date'] = pd.to_datetime(calendar['end_date'], format='%Y%m%d')

# Get the current date as a datetime object
current_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d'))

# Filter rows where the current date is between start_date and end_date or end_date is in the future
active_schedules = calendar[(calendar['start_date'] <= current_date) & (calendar['end_date'] >= current_date) | (calendar['end_date'] > current_date)]

print(f"Active/Future schedules on {current_date.date()}:")
print(active_schedules)

Active schedules on 2024-09-24:
        service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
3             fall       1        1          1         1       1         1   
10       september       1        1          1         1       1         1   
11         october       1        1          1         1       1         1   
12        november       1        1          1         1       1         1   
13        december       1        1          1         1       1         1   
16  late september       1        1          1         1       1         1   
17   early october       1        1          1         1       1         1   
18     mid october       1        1          1         1       1         1   
19    late october       1        1          1         1       1         1   
20  early november       1        1          1         1       1         1   
21    mid november       1        1          1         1       1         1   
22   late november       1      

In [5]:
#Chunk 5: Add trips file

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the trips.txt file
trips_file = os.path.join(unzipped_folder, "trips.txt")

# Check if the file exists
if os.path.exists(trips_file):
    # Load the trips.txt into a pandas DataFrame
    trips = pd.read_csv(trips_file)
    print("Loaded trips.txt into a DataFrame.")
    print(trips.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {trips_file} does not exist.")

Loaded trips.txt into a DataFrame.
  route_id service_id             trip_id                trip_headsign  \
0  village     spring  village_spring_T01  Grand Canyon Visitor Center   
1  village     spring  village_spring_T02  Grand Canyon Visitor Center   
2  village     spring  village_spring_T03  Grand Canyon Visitor Center   
3  village     spring  village_spring_T04  Grand Canyon Visitor Center   
4  village     spring  village_spring_T05  Grand Canyon Visitor Center   

   direction_id  block_id shape_id  wheelchair_accessible  bikes_allowed  
0             1       NaN  village                    1.0            1.0  
1             1       NaN  village                    1.0            1.0  
2             1       NaN  village                    1.0            1.0  
3             1       NaN  village                    1.0            1.0  
4             1       NaN  village                    1.0            1.0  


In [6]:
#Chunk 6: Filter to active trips

# Filter trips where service_id in trips matches any service_id in active_schedules
active_trips = trips[trips['service_id'].isin(active_schedules['service_id'])]

print("Active trips based on active schedules:")
print(active_trips)

Active trips based on active schedules:
     route_id     service_id                   trip_id  \
151   village           fall          village_fall_T01   
152   village           fall          village_fall_T02   
153   village           fall          village_fall_T03   
154   village           fall          village_fall_T04   
155   village           fall          village_fall_T05   
...       ...            ...                       ...   
1292   hermit  late november  hermit_late november_T46   
1293   hermit  late november  hermit_late november_T47   
1294   hermit  late november  hermit_late november_T48   
1295   hermit  late november  hermit_late november_T49   
1296   hermit  late november  hermit_late november_T50   

                    trip_headsign  direction_id  block_id shape_id  \
151   Grand Canyon Visitor Center             1       NaN  village   
152   Grand Canyon Visitor Center             1       NaN  village   
153   Grand Canyon Visitor Center             1      

In [7]:
#Chunk 7: Add stop_times 

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the stop_times.txt file
stop_times_file = os.path.join(unzipped_folder, "stop_times.txt")

# Check if the file exists
if os.path.exists(stop_times_file):
    # Load the stop_times.txt into a pandas DataFrame
    stop_times = pd.read_csv(stop_times_file)
    print("Loaded stop_times.txt into a DataFrame.")
    print(stop_times.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {stop_times_file} does not exist.")

Loaded stop_times.txt into a DataFrame.
              trip_id arrival_time departure_time  stop_id  stop_sequence  \
0  village_spring_T01     07:00:00       07:00:00        1              1   
1  village_spring_T01     07:05:00       07:05:00        2              2   
2  village_spring_T01     07:08:00       07:08:00        3              3   
3  village_spring_T01     07:12:00       07:12:00        4              4   
4  village_spring_T01     07:15:00       07:15:00        5              5   

                  stop_headsign  pickup_type  drop_off_type  \
0        Market Plaza Westbound          NaN            NaN   
1  Shrine of the Ages Westbound          NaN            NaN   
2                   Train Depot          NaN            NaN   
3            Bright Angel Lodge          NaN            NaN   
4         Hermits Rest Transfer          NaN            NaN   

   continuous_pickup  continuous_drop_off  shape_dist_traveled  timepoint  
0                NaN                  NaN 

In [8]:
#Chunk 7: Fitler out active stop times
active_stop_times = stop_times[stop_times['trip_id'].isin(active_trips['trip_id'])]

print(f"Active stop times DataFrame created with {len(active_stop_times)} rows.")
print(active_stop_times.head())  # Print the first few rows of the active_stop_times DataFrame

Active stop times DataFrame created with 8262 rows.
               trip_id arrival_time departure_time  stop_id  stop_sequence  \
2265  village_fall_T01     07:00:00       07:00:00        1              1   
2266  village_fall_T01          NaN            NaN        2              2   
2267  village_fall_T01          NaN            NaN        3              3   
2268  village_fall_T01          NaN            NaN        4              4   
2269  village_fall_T01          NaN            NaN        5              5   

                     stop_headsign  pickup_type  drop_off_type  \
2265        Market Plaza Westbound          NaN            NaN   
2266  Shrine of the Ages Westbound          NaN            NaN   
2267                   Train Depot          NaN            NaN   
2268            Bright Angel Lodge          NaN            NaN   
2269         Hermits Rest Transfer          NaN            NaN   

      continuous_pickup  continuous_drop_off  shape_dist_traveled  timepoint  
226

In [9]:
# Chunk 9: Add shape files

# Define the path to the unzipped folder
unzipped_folder = f"{park_name}_unzipped"

# Define the path to the shapes.txt file
shape_file = os.path.join(unzipped_folder, "shapes.txt")

# Check if the file exists
if os.path.exists(shape_file):
    # Load the shapes.txt into a pandas DataFrame
    shapes = pd.read_csv(shape_file)
    print("Loaded shapes.txt into a DataFrame.")
    print(shapes.head())  # Print the first few rows of the DataFrame
else:
    print(f"The file {shape_file} does not exist.")

Loaded shapes.txt into a DataFrame.
  shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence  shape_dist_traveled
0  village      36.05814    -112.10857                  1                0.000
1  village      36.05809    -112.10851                  2                0.008
2  village      36.05798    -112.10838                  3                0.025
3  village      36.05794    -112.10832                  4                0.032
4  village      36.05785    -112.10820                  5                0.046


In [10]:
#Chunk 10: Filter to active shapes
active_shapes = shapes[shapes['shape_id'].isin(active_trips['shape_id'])]

print(f"Active shapes DataFrame created with {len(active_shapes)} rows.")
print(active_shapes.head())  # Print the first few rows of the active_shapes DataFrame

Active shapes DataFrame created with 3958 rows.
  shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence  shape_dist_traveled
0  village      36.05814    -112.10857                  1                0.000
1  village      36.05809    -112.10851                  2                0.008
2  village      36.05798    -112.10838                  3                0.025
3  village      36.05794    -112.10832                  4                0.032
4  village      36.05785    -112.10820                  5                0.046


In [11]:
#Chunk 11: Convert calendar dates back to GTFS format (YYYYMMDD)
active_schedules['start_date'] = active_schedules['start_date'].dt.strftime('%Y%m%d')
active_schedules['end_date'] = active_schedules['end_date'].dt.strftime('%Y%m%d')

print("Converted dates in active_schedules DataFrame:")
print(active_schedules.head())  # Print the first few rows to verify the changes

Converted dates in active_schedules DataFrame:
   service_id  monday  tuesday  wednesday  thursday  friday  saturday  sunday  \
3        fall       1        1          1         1       1         1       1   
10  september       1        1          1         1       1         1       1   
11    october       1        1          1         1       1         1       1   
12   november       1        1          1         1       1         1       1   
13   december       1        1          1         1       1         1       1   

   start_date  end_date  
3    20240907  20241130  
10   20240907  20240930  
11   20241001  20241031  
12   20241101  20241130  
13   20241201  20241231  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  active_schedules['start_date'] = active_schedules['start_date'].dt.strftime('%Y%m%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  active_schedules['end_date'] = active_schedules['end_date'].dt.strftime('%Y%m%d')


In [12]:
#Chunk 12: Create new archive for the active, filtered GTFS feed

# Define the source and destination folders 
source_folder = f"{park_name}_unzipped"
destination_folder = f"{park_name}_active_unzipped"

# Check if the destination folder exists
if os.path.exists(destination_folder):
    # Delete all files in the destination folder
    for file_name in os.listdir(destination_folder):
        file_path = os.path.join(destination_folder, file_name)
        if os.path.isfile(file_path):
            os.remove(file_path)
    print(f"Deleted all files in {destination_folder}.")
else:
    # Create the destination folder if it doesn't exist
    os.makedirs(destination_folder, exist_ok=True)
    print(f"Created destination folder: {destination_folder}.")

# Check if the source folder exists
if os.path.exists(source_folder):
    # Transfer all files from source to destination
    for file_name in os.listdir(source_folder):
        # Construct full file path
        full_file_name = os.path.join(source_folder, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, destination_folder)
    
    print(f"All files transferred from {source_folder} to {destination_folder}.")
else:
    print(f"The source folder {source_folder} does not exist.")

Created destination folder: GRCA_active_unzipped.
All files transferred from GRCA_unzipped to GRCA_active_unzipped.


In [13]:
#Chunk 13: Delete any old files, update the calendar, text, shapes, and stoptimes to active versions

# Define the paths to the files to delete
calendar_file_path = os.path.join(destination_folder, "calendar.txt")
trips_file_path = os.path.join(destination_folder, "trips.txt")
stop_times_file_path = os.path.join(destination_folder, "stop_times.txt")
shape_file_path = os.path.join(destination_folder, "shapes.txt")

# Delete the calendar.txt and trips.txt files
if os.path.exists(calendar_file_path):
    os.remove(calendar_file_path)
    print(f"Deleted {calendar_file_path}.")
else:
    print(f"{calendar_file_path} does not exist.")

if os.path.exists(trips_file_path):
    os.remove(trips_file_path)
    print(f"Deleted {trips_file_path}.")
else:
    print(f"{trips_file_path} does not exist.")

if os.path.exists(stop_times_file_path):
    os.remove(stop_times_file_path)
    print(f"Deleted {stop_times_file_path}.")
else:
    print(f"{stop_times_file_path} does not exist.")
    
if os.path.exists(shape_file_path):
    os.remove(shape_file_path)
    print(f"Deleted {shape_file_path}.")
else:
    print(f"{shape_file_path} does not exist.")
    
# Save active info to folder
calendar_file = os.path.join(destination_folder, "calendar.txt")
trips_file = os.path.join(destination_folder, "trips.txt")
stop_times_file = os.path.join(destination_folder, "stop_times.txt")
shape_file = os.path.join(destination_folder, "shapes.txt")

active_schedules.to_csv(calendar_file, sep=',', index=False)
active_trips.to_csv(trips_file, sep=',', index=False)
active_stop_times.to_csv(stop_times_file, sep=',', index=False)
active_shapes.to_csv(shape_file, sep=',', index=False)

print(f"Saved active schedules to {calendar_file}.")
print(f"Saved active trips to {trips_file}.")
print(f"Saved active stop times to {stop_times_file}.")
print(f"Saved active shapes to {shape_file}.")

Deleted GRCA_active_unzipped\calendar.txt.
Deleted GRCA_active_unzipped\trips.txt.
Deleted GRCA_active_unzipped\stop_times.txt.
Deleted GRCA_active_unzipped\shapes.txt.
Saved active schedules to GRCA_active_unzipped\calendar.txt.
Saved active trips to GRCA_active_unzipped\trips.txt.
Saved active stop times to GRCA_active_unzipped\stop_times.txt.
Saved active shapes to GRCA_active_unzipped\shapes.txt.


In [14]:
#Chunk 14: Zip it all back up

# Define the path to the active unzipped folder and the zip file name
active_folder = destination_folder
zip_file_name = f"{park_name}_active"

# Create a zip file from the active folder
shutil.make_archive(zip_file_name, 'zip', active_folder)

print(f"Created zip file: {zip_file_name}.zip")

Created zip file: GRCA_active.zip
