In [2]:
import os
import csv
from collections import defaultdict
import json

In [3]:
path = "GTFS-Daten"
trips = os.path.join(path, "trips.txt")

output_path = "GTFS-human-readable"
trips_csv = os.path.join(output_path, "trips.csv")
trips_summary_json = os.path.join(output_path, "trips_summary.json")

In [4]:
# Initialize a dictionary to hold summary data
summary = defaultdict(int)
unique_routes = set()
unique_services = set()

with open(trips, "r") as file:
    reader = csv.DictReader(file)
    
    for row in reader:
        # Count total trips
        summary["total_trips"] += 1
        
        # Track unique routes and services
        unique_routes.add(row["route_id"])
        unique_services.add(row["service_id"])

# Store the unique counts
summary["unique_routes"] = len(unique_routes)
summary["unique_services"] = len(unique_services)

In [5]:
summary

defaultdict(int,
            {'total_trips': 3978, 'unique_routes': 102, 'unique_services': 34})

# Create trips details that is easy to read and understand
- only route_id, service_id, trip_id (bus_nr), trip_headsign (direction)

In [6]:
""" trips.csv """

# Open the input and output files
with open(trips, "r", newline='') as infile, open(trips_csv, "w", newline='') as outfile:
    reader = csv.DictReader(infile)
    
    # Define the headers for the output CSV
    fieldnames = ["route_id", "service_id", "trip_id", "trip_headsign"]
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    
    # Write the header to the output file
    writer.writeheader()
    
    # Write selected fields to the output file
    for row in reader:
        # Extract the relevant columns and process trip_id
        trip_id_parts = row["trip_id"].split(":")
        
        # Check if trip_id has enough parts to avoid index errors
        if len(trip_id_parts) >= 4:
            row["trip_id"] = trip_id_parts[2]  # Get the portion between the 2nd and 3rd colon

        # Write the modified row to the output file
        extracted_row = {field: row[field] for field in fieldnames}
        writer.writerow(extracted_row)

In [7]:
""" trips_summary.json - summary """

# Initialize a dictionary to hold the summary data
trip_summary = defaultdict(lambda: defaultdict(int))

# Open the trips.csv file and process it
with open(trips_csv, "r", newline='') as infile:
    reader = csv.DictReader(infile)
    
    for row in reader:
        trip_id = row["trip_id"]
        trip_headsign = row["trip_headsign"]
        
        # Increment the count of the specific trip_headsign for the given trip_id
        trip_summary[trip_id][trip_headsign] += 1

# Write the summary to a JSON file
with open(trips_summary_json, "w") as outfile:
    json.dump(trip_summary, outfile, indent=4, ensure_ascii=False)

"""