# Trip Generator
This notebook is used to geenrate valid OD pairs for SUMO simulations.

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import json
from geopy.distance import geodesic
from shapely.geometry import Point
from sumolib.net import readNet
import xml.etree.ElementTree as ET
import random
import h3
from sklearn.neighbors import BallTree


random.seed(42)

In [2]:
recommended_stations = pd.read_csv(r'../best_hexagons.csv', usecols=['h3_index'])
existing_stations = pd.read_csv(r'../Datasets/existing_stations.csv', usecols=['lat','lon','name'])
commuting_df = pd.read_csv("..\\Datasets\\Wandsworth-travel.csv")

with open("..\\geojson\\map.geojson", "r") as f:
    geojson = json.load(f)

In [3]:
gdf = gpd.GeoDataFrame.from_features(geojson["features"], crs="EPSG:4326")
commuting_df = commuting_df.rename(columns={
    commuting_df.columns[1]: 'lsoa21cd',
    'Less than 2km': '<2km',
    '2km to less than 5km': '<5km',
    '5km to less than 10km': '<10km'
})
merged = gdf.merge(commuting_df, on='lsoa21cd')

In [4]:
net = readNet("..\\london.net.xml")

In [None]:
def snap_to_edge(lat, lon, radius=500):
    try:
        x, y = net.convertLonLat2XY(lon, lat)
        edges = net.getNeighboringEdges(x, y, radius)
        if not edges:
            return None
            
        # Prefer pedestrian-accessible edges
        pedestrian_edges = [edge for edge, _ in edges if edge.allows('pedestrian')]
        if pedestrian_edges:
            return pedestrian_edges[0].getID()
        else:
            return edges[0][0].getID()
    except:
        return None

def generate_random_point_within(poly):
    minx, miny, maxx, maxy = poly.bounds
    while True:
        p = Point(random.uniform(minx, maxx), random.uniform(miny, maxy))
        if poly.contains(p):
            return p

def trip_potential(row):
    return int(row.get('<2km',0)*0.3 + row.get('<5km',0)*0.2 + row.get('<10km',0)*0.1) # Can change sampling weights

def h3_to_latlon(h3_id):
    return h3.cell_to_latlng(h3_id)

In [None]:
# Sanity check for the stations in our system
recommended_coords = [h3_to_latlon(h3id) for h3id in recommended_stations['h3_index']]
existing_coords = existing_stations[['lat', 'lon']].values.tolist()

print(f"Existing stations: {len(existing_coords)}")
print(f"Recommended stations: {len(recommended_coords)}")

Existing stations: 798
Recommended stations: 5


In [7]:
od_points = []
for _, row in merged.iterrows():
    potential = trip_potential(row)
    for _ in range(potential):
        pt_o = generate_random_point_within(row.geometry)
        pt_d = generate_random_point_within(row.geometry)
        od_points.append((pt_o.y, pt_o.x, pt_d.y, pt_d.x))

print(f"Total OD pairs: {len(od_points)}")

Total OD pairs: 9433


In [None]:
with open("stations.poi.xml", "w") as f:
    f.write('<?xml version="1.0"?>\\n<additional>\\n')
    for i, (lat, lon) in enumerate(existing_coords):
        edge = snap_to_edge(lat, lon)
        if edge:
            f.write(f'  <poi id="existing_{i}" type="station" x="{lon}" y="{lat}" color="blue" layer="1" roadId="{edge}"/>\\n')
    for i, (lat, lon) in enumerate(recommended_coords):
        edge = snap_to_edge(lat, lon)
        if edge:
            f.write(f'  <poi id="recommended_{i}" type="station" x="{lon}" y="{lat}" color="green" layer="1" roadId="{edge}"/>\\n')
    f.write('</additional>')

def build_balltree(coords): # Use balltree for nearest neighbor search
    coords_rad = np.radians(coords)
    return BallTree(coords_rad, metric='haversine'), coords_rad

# Find two different stations near origin and destination + Ensure they are different
def find_two_different_stations(origin_lat, origin_lon, dest_lat, dest_lon, station_coords, min_distance_km=0.5):
    if len(station_coords) < 2:
        return None, None
        
    balltree, station_coords_rad = build_balltree(station_coords)
    
    # Find nearest stations to OD pairs
    origin_rad = np.radians([[origin_lat, origin_lon]])
    dest_rad = np.radians([[dest_lat, dest_lon]])
    
    # Get multiple candidates for each point
    k = min(5, len(station_coords))  # Get up to 5 nearest stations
    
    dist_o, idx_o = balltree.query(origin_rad, k=k)
    dist_d, idx_d = balltree.query(dest_rad, k=k)
    
    # Try to find two different stations
    for i in range(k):
        for j in range(k):
            station_A_idx = int(idx_o[0][i])
            station_B_idx = int(idx_d[0][j])
            
            # Check for unique stations
            if station_A_idx != station_B_idx:
                station_A = station_coords[station_A_idx]
                station_B = station_coords[station_B_idx]
                
                # Check minimum distance (Else it wouldn't make sense to use bike-sharing)
                distance_km = geodesic(station_A, station_B).kilometers
                if distance_km >= min_distance_km:
                    return station_A, station_B
    
    # Fallback
    return None, None

In [None]:
# Generate multimodal trips
def generate_multimodal_trips(od_points, station_coords, max_trips=2000):
    trips = []
    skipped_same_station = 0
    skipped_no_edge = 0
    skipped_no_route = 0
    
    for i, (o_lat, o_lon, d_lat, d_lon) in enumerate(od_points[:max_trips]):
        if i % 500 == 0:
            print(f"Processing trip {i}/{min(max_trips, len(od_points))}")
            
        # Find edges for walking segments
        walk_origin_edge = snap_to_edge(o_lat, o_lon)
        walk_dest_edge = snap_to_edge(d_lat, d_lon)
        
        if not walk_origin_edge or not walk_dest_edge:
            skipped_no_edge += 1
            continue
        
        # Find two different stations
        station_A, station_B = find_two_different_stations(
            o_lat, o_lon, d_lat, d_lon, station_coords, min_distance_km=0.5
        )
        
        if not station_A or not station_B:
            skipped_same_station += 1
            continue
            
        # Get edges for stations
        station_A_edge = snap_to_edge(*station_A)
        station_B_edge = snap_to_edge(*station_B)
        
        if not station_A_edge or not station_B_edge:
            skipped_no_edge += 1
            continue
            
        # Ensure station edges are different
        if station_A_edge == station_B_edge:
            skipped_same_station += 1
            continue
            
        # Ensure all edges are different where they should be
        edges = [walk_origin_edge, station_A_edge, station_B_edge, walk_dest_edge]
        if station_A_edge == station_B_edge:
            skipped_same_station += 1
            continue
            
        trips.append({
            "walk_origin_edge": walk_origin_edge,
            "station_A_edge": station_A_edge,
            "station_B_edge": station_B_edge,
            "walk_dest_edge": walk_dest_edge,
            "origin_coords": (o_lat, o_lon),
            "dest_coords": (d_lat, d_lon),
            "station_A_coords": station_A,
            "station_B_coords": station_B
        })
    
    print(f"Generated {len(trips)} valid trips")
    print(f"Skipped {skipped_same_station} trips due to same station")
    print(f"Skipped {skipped_no_edge} trips due to missing edges")
    print(f"Skipped {skipped_no_route} trips due to routing issues")
    
    return trips

In [None]:
def validate_trips(trips, sample_size=10):
    """Validate a sample of trips to ensure they're correct"""
    print(f"\\nValidating {min(sample_size, len(trips))} trips...")
    
    for i, trip in enumerate(trips[:sample_size]):
        print(f"\\nTrip {i}:")
        print(f"  Walk: {trip['walk_origin_edge']} -> {trip['station_A_edge']}")
        print(f"  Ride: {trip['station_A_edge']} -> {trip['station_B_edge']}")
        print(f"  Walk: {trip['station_B_edge']} -> {trip['walk_dest_edge']}")
        
        # Print check results
        if trip['station_A_edge'] == trip['station_B_edge']:
            print(f"ERROR: Same station for ride segment")
        else:
            print(f"Different stations for ride segment")
            
        # Distance check
        dist = geodesic(trip['station_A_coords'], trip['station_B_coords']).kilometers
        print(f"  Station distance: {dist:.2f} km")

## Make multimodal routes
SUMO have a problem in modelling bike sharing systems as we can either model each agent as a pedestrian or a vehicle. The problem becomes, when we model the agents as a pedestrian, they can only use public transportation with pre-defined routes, which is not the case for bike-sharing services. Therefore, we have to model the agents so that they are two types of vehicles: 'walk_bike' and 'ride_bike'.

'walk_bike' will have the same characteristics (speed, acceleration, etc.) as a normal pedestrian while 'ride_bike' will have the normal characteristic of a person riding a bicycle.

In [None]:
def write_person_trip_file(trip_data, output_file):
    import xml.etree.ElementTree as ET

    root = ET.Element("routes")

    # Define slow and fast bike types
    ET.SubElement(root, "vType", {
        "id": "walk_bike",
        "vClass": "bicycle",
        "maxSpeed": "1.5",
        "accel": "0.8",
        "decel": "2.0"
    })
    ET.SubElement(root, "vType", {
        "id": "ride_bike",
        "vClass": "bicycle",
        "maxSpeed": "5.0",
        "accel": "2.0",
        "decel": "4.0"
    })

    valid_trip_count = 0
    for i, trip in enumerate(trip_data):
        # Skip redundant trips
        if trip["walk_origin_edge"] == trip["station_A_edge"] == trip["station_B_edge"] == trip["walk_dest_edge"]:
            continue

        person = ET.SubElement(root, "person", {
            "id": f"person_{valid_trip_count}",
            "depart": str(valid_trip_count)
        })

        if trip["walk_origin_edge"] != trip["station_A_edge"]:
            ET.SubElement(person, "personTrip", {
                "from": trip["walk_origin_edge"],
                "to": trip["station_A_edge"],
                "mode": "bike",
                "type": "walk_bike"
            })

        if trip["station_A_edge"] != trip["station_B_edge"]:
            ET.SubElement(person, "personTrip", {
                "from": trip["station_A_edge"],
                "to": trip["station_B_edge"],
                "mode": "bike",
                "type": "ride_bike"
            })

        if trip["station_B_edge"] != trip["walk_dest_edge"]:
            ET.SubElement(person, "personTrip", {
                "from": trip["station_B_edge"],
                "to": trip["walk_dest_edge"],
                "mode": "bike",
                "type": "walk_bike"
            })

        valid_trip_count += 1

    tree = ET.ElementTree(root)
    tree.write(output_file, encoding="utf-8", xml_declaration=True)
    print(f"Wrote {valid_trip_count} valid person trips to {output_file}")

In [15]:
# Generate trips with the fixed algorithm
multimodal_before = generate_multimodal_trips(od_points, existing_coords, max_trips=2000)
multimodal_after = generate_multimodal_trips(od_points, existing_coords + recommended_coords, max_trips=2000)


Processing trip 0/2000
Processing trip 500/2000
Processing trip 1000/2000
Processing trip 1500/2000
Generated 1456 valid trips
Skipped 293 trips due to same station
Skipped 251 trips due to missing edges
Skipped 0 trips due to routing issues
Processing trip 0/2000
Processing trip 500/2000
Processing trip 1000/2000
Processing trip 1500/2000
Generated 1456 valid trips
Skipped 293 trips due to same station
Skipped 251 trips due to missing edges
Skipped 0 trips due to routing issues


In [16]:
write_person_trip_file(multimodal_before, "trips_before.rou.xml")
write_person_trip_file(multimodal_after, "trips_after.rou.xml")


✅ Wrote 1456 valid person trips to trips_before.rou.xml
✅ Wrote 1456 valid person trips to trips_after.rou.xml
