In [1]:
import os
import zipfile
import pandas as pd
from datetime import datetime

In [2]:
def import_park_zip(park_code):
    zip_file_name = f"{park_code}.zip"
    
    # Check if the ZIP file exists
    if os.path.isfile(zip_file_name):
        try:
            # Open the ZIP file
            with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
                # List the files in the ZIP
                file_list = zip_ref.namelist()
                print(f"Files in {zip_file_name}:")
                for file in file_list:
                    print(f"- {file}")

                # Extract the contents to a folder named after the park code
                extract_folder = park_code
                os.makedirs(extract_folder, exist_ok=True)
                zip_ref.extractall(extract_folder)
                print(f"Successfully imported and extracted {zip_file_name} to {extract_folder}/")
                
                return extract_folder

        except zipfile.BadZipFile:
            print(f"Error: {zip_file_name} is not a valid zip file.")
            return None
    else:
        print(f"Error: {zip_file_name} does not exist.")
        return None

In [3]:
def create_calendar_df(extract_folder):
    try:
        # Path to the calendar.txt file
        calendar_file_path = os.path.join(extract_folder, 'calendar.txt')
        
        # Read the calendar.txt file into a DataFrame
        calendar = pd.read_csv(calendar_file_path, delimiter=',') 
        print("Calendar DataFrame created successfully:")
        print(calendar)

        # Convert start_date and end_date columns to datetime
        calendar['start_date'] = pd.to_datetime(calendar['start_date'], format='%Y%m%d')
        calendar['end_date'] = pd.to_datetime(calendar['end_date'], format='%Y%m%d')

        return calendar

    except FileNotFoundError:
        print(f"Error: The file '{calendar_file_path}' does not exist.")
    except pd.errors.EmptyDataError:
        print("Error: The calendar file is empty.")
    except pd.errors.ParserError:
        print("Error: There was a problem parsing the calendar file.")
    except Exception as e:
        print(f"An error occurred: {e}")

    return None

In [4]:
def check_active_schedules(calendar):
    try:
        # Get the current date as a datetime object
        current_date = pd.to_datetime(datetime.now().strftime('%Y-%m-%d'))
        
        # Filter rows where the current date is between start_date and end_date
        active_schedules = calendar[(calendar['start_date'] <= current_date) & (calendar['end_date'] >= current_date)]

        print(f"Active schedules on {current_date.date()}:")
        print(active_schedules)

    except Exception as e:
        print(f"An error occurred while checking active schedules: {e}")

In [5]:
def create_trips_df(extract_folder):
    try:
        # Path to the trips.txt file
        trips_file_path = os.path.join(extract_folder, 'trips.txt')
        
        # Read the trips.txt file into a DataFrame
        trips = pd.read_csv(trips_file_path, delimiter=',') 
        print("Trips DataFrame created successfully:")
        print(trips)

        return trips

    except FileNotFoundError:
        print(f"Error: The file '{trips_file_path}' does not exist.")
    except pd.errors.EmptyDataError:
        print("Error: The trips file is empty.")
    except pd.errors.ParserError:
        print("Error: There was a problem parsing the trips file.")
    except Exception as e:
        print(f"An error occurred: {e}")

    return None


In [6]:
def active_trips(trips, active_schedules):
    try:
        # Filter trips where service_id in trips matches any service_id in active_schedules
        filtered_trips = trips[trips['service_id'].isin(active_schedules['service_id'])]

        print("Filtered Trips DataFrame based on active schedules:")
        print(filtered_trips)

        return filtered_trips

    except KeyError:
        print("Error: 'service_id' column not found in one or both DataFrames.")
    except Exception as e:
        print(f"An error occurred while filtering trips: {e}")

    return None

In [7]:
if __name__ == "__main__":
    park_code = input("Please enter the park code: ")

    # Step 1: Import the ZIP file and extract it
    extract_folder = import_park_zip(park_code)
    
    if extract_folder:
        # Step 2: Create the calendar DataFrame
        calendar = create_calendar_df(extract_folder)
        
        if calendar is not None:
            # Step 3: Check active schedules
            active_schedules = check_active_schedules(calendar)

            # Step 4: Create the trips DataFrame
            trips = create_trips_df(extract_folder)
            
            if trips is not None and active_schedules is not None:
                # Step 5: Filter trips by active schedules
                filtered_trips = active_trips(trips, active_schedules)


Please enter the park code: STLI
Files in STLI.zip:
- agency.txt
- calendar.txt
- calendar_dates.txt
- fare_attributes.txt
- fare_rules.txt
- feed_info.txt
- routes.txt
- shapes.txt
- stop_times.txt
- stops.txt
- transfers.txt
- trips.txt
Successfully imported and extracted STLI.zip to STLI/
Calendar DataFrame created successfully:
          service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
0            NJEarly       1        1          1         1       1         1   
1             NJFall       1        1          1         1       1         1   
2           NJWinter       1        1          1         1       1         1   
3          NJHoliday       1        1          1         1       1         1   
4     NYEarlyWeekday       1        1          1         1       1         0   
5    NYWinterWeekday       1        1          1         1       1         0   
6     NYEarlyWeekend       0        0          0         0       0         1   
7      NYFallWeekend      

In [8]:
if __name__ == "__main__":
    park_code = input("Please enter the park code: ")

    # Step 1: Import the ZIP file and extract it
    extract_folder = import_park_zip(park_code)
    
    if extract_folder:
        # Step 2: Create the calendar DataFrame
        calendar = create_calendar_df(extract_folder)

Please enter the park code: STLI
Files in STLI.zip:
- agency.txt
- calendar.txt
- calendar_dates.txt
- fare_attributes.txt
- fare_rules.txt
- feed_info.txt
- routes.txt
- shapes.txt
- stop_times.txt
- stops.txt
- transfers.txt
- trips.txt
Successfully imported and extracted STLI.zip to STLI/
Calendar DataFrame created successfully:
          service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
0            NJEarly       1        1          1         1       1         1   
1             NJFall       1        1          1         1       1         1   
2           NJWinter       1        1          1         1       1         1   
3          NJHoliday       1        1          1         1       1         1   
4     NYEarlyWeekday       1        1          1         1       1         0   
5    NYWinterWeekday       1        1          1         1       1         0   
6     NYEarlyWeekend       0        0          0         0       0         1   
7      NYFallWeekend      

In [9]:
if calendar is not None:
            # Step 3: Check active schedules
            active_schedules = check_active_schedules(calendar)

            

Active schedules on 2024-09-18:
       service_id  monday  tuesday  wednesday  thursday  friday  saturday  \
0         NJEarly       1        1          1         1       1         1   
4  NYEarlyWeekday       1        1          1         1       1         0   
6  NYEarlyWeekend       0        0          0         0       0         1   

   sunday start_date   end_date  
0       1 2024-09-13 2024-10-14  
4       0 2024-09-13 2024-10-14  
6       1 2024-09-13 2024-10-18  


In [10]:
trips = create_trips_df(extract_folder)
            
            

Trips DataFrame created successfully:
    route_id service_id                                          trip_id  \
0         NJ    NJEarly                Liberty State Park Loop Early_T01   
1         NJ    NJEarly                Liberty State Park Loop Early_T02   
2         NJ    NJEarly                Liberty State Park Loop Early_T03   
3         NJ    NJEarly                Liberty State Park Loop Early_T04   
4         NJ    NJEarly                Liberty State Park Loop Early_T05   
..       ...        ...                                              ...   
202     BPLI     NYXmas  Battery Park - Liberty Island Loop Xmas Eve_T02   
203     BPLI     NYXmas  Battery Park - Liberty Island Loop Xmas Eve_T03   
204     BPEI     NYXmas    Battery Park - Ellis Island Loop Xmas Eve_T01   
205     BPEI     NYXmas    Battery Park - Ellis Island Loop Xmas Eve_T02   
206     BPEI     NYXmas    Battery Park - Ellis Island Loop Xmas Eve_T03   

      trip_headsign  direction_id  block_id shape

In [11]:
if trips is not None and active_schedules is not None:
                # Step 5: Filter trips by active schedules
                filtered_trips = active_trips(trips, active_schedules)
                
                # Print active trips at the end
                if filtered_trips is not None and not filtered_trips.empty:
                    print("Active trips:")
                    print(filtered_trips)
                else:
                    print("No active trips found.")