In [2]:
import pandas as pd
import partridge as ptg

# Path to the GTFS zip file
zip_path = "../../vgn/GTFS.zip"

# Load stops.txt directly with partridge
geo_feed= ptg.load_geo_feed(zip_path)
bus_lines = [561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 573, 574, 575]
print("List of bus lines:", bus_lines)

List of bus lines: [561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 573, 574, 575]


In [17]:
# Filter the GTFS dataset for the specified bus lines
routes_df = geo_feed.routes
filtered_routes = routes_df[routes_df['route_short_name'].astype(str).isin([str(x) for x in bus_lines])]
print(f"Filtered routes for bus lines {bus_lines}:")


Filtered routes for bus lines [561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 573, 574, 575]:


numpy.int64

In [23]:
# Filter all GTFS schedule tables for the selected bus lines
nm_route_ids = filtered_routes['route_id']
trips_df = geo_feed.trips
nm_trips = trips_df[trips_df['route_id'].isin(nm_route_ids)]
# stop times
stop_times_df = geo_feed.stop_times
nm_stop_times = stop_times_df[stop_times_df['trip_id'].isin(nm_trips['trip_id'])]
# stops
stops_df = geo_feed.stops
nm_stops = stops_df[stops_df['stop_id'].isin(nm_stop_times['stop_id'])]

# calendars
calendar_df = geo_feed.calendar if hasattr(geo_feed, 'calendar') else None
calendar_dates_df = geo_feed.calendar_dates if hasattr(geo_feed, 'calendar_dates') else None

# transfers
transfer_df = geo_feed.transfers
nm_transfers = transfer_df[transfer_df["from_stop_id"].isin(nm_stops["stop_id"])]
nm_agency =  geo_feed.agency


In [None]:
import zipfile
import io
import formatting
gtfs_zip_path = "neumarkt_gtfs.zip"

# DataFrames to save: name -> DataFrame
gtfs_tables = {
    "trips.txt": nm_trips,
    "stop_times.txt": nm_stop_times,
    "stops.txt": nm_stops,
    "routes.txt": filtered_routes,
    "transfers.txt": nm_transfers,
    "agency.txt": nm_agency,
}
if calendar_df is not None:
    gtfs_tables["calendar.txt"] = calendar_df[calendar_df['service_id'].isin(nm_trips['service_id'])]
if calendar_dates_df is not None:
    gtfs_tables["calendar_dates.txt"] = calendar_dates_df[calendar_dates_df['service_id'].isin(nm_trips['service_id'])]

with zipfile.ZipFile(gtfs_zip_path, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
    for fname, df in gtfs_tables.items():
        with io.StringIO() as buf:
            df = formatting.format_df_for_gtfs(df)
            df.to_csv(buf, index=False)
            zf.writestr(fname, buf.getvalue())
print(f"GTFS feed written directly to {gtfs_zip_path}")

GTFS feed written directly to neumarkt_gtfs.zip


In [None]:
import shutil
gtfs_zip_path = "./neumarkt_gtfs.zip"
shutil.make_archive("./neumarkt_gtfs", 'zip', nm_dir)
print(f"GTFS feed saved as {gtfs_zip_path}")

In [None]:
# Function to add a new trip to a route using a reference trip and a time offset
def add_trip_with_offset(reference_trip_id, time_offset_minutes, trips_df, stop_times_df):
    """
    Adds a new trip based on a reference trip, shifting all stop_times by a given offset.
    Args:
        reference_trip_id (str): The trip_id to use as a template.
        time_offset_minutes (int): Minutes to add to all times.
        trips_df (pd.DataFrame): The trips table to update.
        stop_times_df (pd.DataFrame): The stop_times table to update.
    Returns:
        (trips_df, stop_times_df): Updated DataFrames.
    """
    import uuid
    from datetime import datetime, timedelta
    # Find the reference trip row
    ref_trip = trips_df[trips_df['trip_id'] == reference_trip_id]
    if ref_trip.empty:
        raise ValueError(f"Reference trip_id {reference_trip_id} not found.")
    # Generate a new unique trip_id
    new_trip_id = str(uuid.uuid4())
    # Copy the trip row and update trip_id
    new_trip = ref_trip.iloc[0].to_dict()
    new_trip['trip_id'] = new_trip_id
    trips_df = pd.concat([trips_df, pd.DataFrame([new_trip])], ignore_index=True)
    # Get stop_times for the reference trip
    ref_stop_times = stop_times_df[stop_times_df['trip_id'] == reference_trip_id].copy()
    def shift_time(t, offset):
        if pd.isna(t): return t
        try:
            dt = datetime.strptime(t, "%H:%M:%S")
            dt_shifted = dt + timedelta(minutes=offset)
            # Handle times that go past midnight (GTFS allows 24:xx:xx etc)
            hours = dt_shifted.hour + (dt_shifted.day - 1) * 24
            return f"{hours:02}:{dt_shifted.minute:02}:{dt_shifted.second:02}"
        except Exception:
            return t
    # Shift all times and assign new trip_id
    new_stop_times = ref_stop_times.copy()
    new_stop_times['trip_id'] = new_trip_id
    for col in ['arrival_time', 'departure_time']:
        if col in new_stop_times.columns:
            new_stop_times[col] = new_stop_times[col].apply(lambda t: shift_time(t, time_offset_minutes))
    stop_times_df = pd.concat([stop_times_df, new_stop_times], ignore_index=True)
    return trips_df, stop_times_df



In [13]:
print(stops_df.columns)


Index(['stop_id', 'stop_name', 'location_type', 'parent_station', 'geometry'], dtype='object')


In [14]:
stops_df["geometry"]

0        POINT (10.14257 49.17557)
1        POINT (10.14247 49.17563)
2        POINT (10.12359 49.12538)
3        POINT (10.12366 49.12534)
4        POINT (10.12152 49.12358)
                   ...            
23336    POINT (10.69042 48.95366)
23337     POINT (11.1686 50.35533)
23338    POINT (11.01247 49.32776)
23339    POINT (11.01229 49.32776)
23340    POINT (11.01928 49.33709)
Name: geometry, Length: 23341, dtype: geometry

In [None]:
# Load routes.txt to get the list of bus lines
routes_df = ptg.load_geo_feed(zip_path).routes
bus_lines = routes_df['route_short_name'].unique()
bus_lines = sorted(bus_lines)
print("List of bus lines:")
for line in bus_lines:
    print(line)