In [3]:
import pandas as pd
import os

# Define folder name and create it if it doesn't exist
folder_name = "Stop_Numbering"
os.makedirs(folder_name, exist_ok=True)

# Loop through all dates from 1 to 31
for date in range(1, 2):
    # Format date as a two-digit string (e.g., 01, 02, ..., 31)
    date_str = f"{date:02d}"

    # Read the CSV file for the given date
    file_path = f'Latest_ETM_Data/Final_Ticket_data_2024-07-{date_str}.csv'
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue

    # Select required columns
    cols = ['source', 'source_stop_no', 'destination', 'destination_stop_no', 'Direction_route']
    df = df[cols]

    # Process source data
    source_cols = ['source', 'source_stop_no', 'Direction_route']
    source_df = df[source_cols]
    source_df = source_df.rename(columns={'source': 'stop', 'source_stop_no': 'stop_no'})
    source_df = source_df[source_df['stop_no'].isna()].drop_duplicates()

    # Process destination data
    destination_cols = ['destination', 'destination_stop_no', 'Direction_route']
    destination_df = df[destination_cols]
    destination_df = destination_df.rename(columns={'destination': 'stop', 'destination_stop_no': 'stop_no'})
    destination_df = destination_df[destination_df['stop_no'].isna()].drop_duplicates()

    # Combine source and destination DataFrames
    combined_df = pd.concat([source_df, destination_df], ignore_index=True).drop_duplicates()

    # Save the combined DataFrame to a CSV file
    combined_file_path = f"{folder_name}/stop_numbering_{date_str}.csv"
    combined_df.to_csv(combined_file_path, index=False)
    print(f"Saved: {combined_file_path}")

Saved: Stop_Numbering/stop_numbering_01.csv
