In [4]:
import os
import re
import xml.etree.ElementTree as ET
import sumolib
import pandas as pd
from IPython.display import display
import sys

sys.path.append('/home/hoai-linh.dao/Works/EVCS/CEREMA-Mini/src') 
from config import *

def get_invalid_routes_summary(dir_path):
    """
    Extract invalid-route vehicle IDs from
    <dir_path>/log/duarouter.log and then parse
    <dir_path>/trips.xml to get fromTaz and toTaz.
    Returns two DataFrames:
      - detailed_df: columns [vehicle_id, fromTaz, toTaz]
      - summary_df: columns [fromTaz, toTaz, error_count]
    """
    # Build fixed paths
    log_path = os.path.join(dir_path, 'logs', 'duarouter.log')
    trips_path = os.path.join(dir_path, 'trips.xml')

    # 1. Read log to collect invalid vehicle IDs
    invalid_ids = set()
    pattern = re.compile(r"Warning: The vehicle '(.+?)' has no valid route")
    with open(log_path, 'r', encoding='utf-8') as f:
        for line in f:
            m = pattern.search(line)
            if m:
                invalid_ids.add(m.group(1))

    # 2. Parse trips.xml and gather details
    tree = ET.parse(trips_path)
    root = tree.getroot()
    details = []
    for trip in root.findall('trip'):
        vid = trip.get('id')
        if vid in invalid_ids:
            details.append({
                'vehicle_id': vid,
                'fromTaz':    trip.get('fromTaz'),
                'toTaz':      trip.get('toTaz')
            })

    detailed_df = pd.DataFrame(details, columns=['vehicle_id', 'fromTaz', 'toTaz'])

    # 3. Summarize counts by fromTaz → toTaz
    summary_df = (
        detailed_df
        .groupby(['fromTaz', 'toTaz'], as_index=False)
        .size()
        .rename(columns={'size': 'error_count'})
    )

    return detailed_df, summary_df

# base_dir = "/home/hoai-linh.dao/Works/EVCS/CEREMA-Mini/result/experiments/20-05-25-checknewmap"
# detailed_df, summary_df = get_invalid_routes_summary(base_dir)

# print(f"Total invalid routes: {len(detailed_df)}\n")
# display(detailed_df)

# print("\nError counts by fromTaz -> toTaz:")
# display(summary_df)

In [5]:
import os
import re
import sys
import xml.etree.ElementTree as ET
import importlib.util
from collections import defaultdict

# ----------------------------------------------------------------------
def extract_failed_trips(log_path):
    """Return set of vehicle IDs that duarouter flagged as 'no valid route'."""
    pat = re.compile(r"Warning: The vehicle '(.+?)' has no valid route")
    failed = set()
    with open(log_path, 'r', encoding='utf-8') as f:
        for line in f:
            m = pat.search(line)
            if m:
                failed.add(m.group(1))
    return failed

# ----------------------------------------------------------------------
def parse_taz_file(taz_path):
    """
    Parse the TAZ XML that looks like:
      <tazs>
        <taz id="2" ...>
          <tazSource id="edgeA"/>
          <tazSink   id="edgeB"/>
          ...
        </taz>
      </tazs>
    Returns dict: { zone_id: [edgeID, edgeID, …], … }
    """
    tree = ET.parse(taz_path)
    root = tree.getroot()
    taz_map = {}
    for taz in root.findall('taz'):
        zid = taz.get('id')
        edge_ids = []
        for child in taz:
            if child.tag in ('tazSource', 'tazSink'):
                edge_ids.append(child.get('id'))
        taz_map[zid] = edge_ids
    return taz_map

# ----------------------------------------------------------------------
def call_findAllRoutes_inprocess(script_path, net_file, src_edges, tgt_edges, out_file):
    """
    Load findAllRoutes.py as a module and call its main() with Python args.
    This avoids shell argument-length limits.
    """
    # 1) Dynamically load the module
    spec = importlib.util.spec_from_file_location("findAllRoutes", script_path)
    far = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(far)

    # 2) Build a fake argv list
    args = [
        "--net-file",    net_file,
        "--source-edges", ",".join(src_edges),
        "--target-edges", ",".join(tgt_edges),
        "--output-file",  out_file
    ]

    # 3) Parse options & invoke
    options = far.get_options(args)
    far.main(options)

# ----------------------------------------------------------------------
def repair_trips(
    base_dir,
    net_file,
    taz_file,
    findall_script,
    out_routes_dir="fixed_routes",
    out_trips="trips_repaired.xml"
):
    """
    1) Reads logs/duarouter.log + trips.xml under base_dir.
    2) Repairs failed trips via findAllRoutes.py in-process.
    3) Writes out base_dir/out_trips.
    """
    # Paths
    log_path   = os.path.join(base_dir, "logs", "duarouter.log")
    trips_path = os.path.join(base_dir, "trips.xml")

    # 1) collect failed IDs
    failed_ids = extract_failed_trips(log_path)
    print(f"Found {len(failed_ids)} failed trips.")

    # 2) parse original trips.xml
    tree = ET.parse(trips_path)
    root = tree.getroot()

    # 3) map each failed trip → its fromTaz, toTaz
    zones_for_trip = {}
    for trip in root.findall('trip'):
        vid = trip.get('id')
        if vid in failed_ids:
            zones_for_trip[vid] = (trip.get('fromTaz'), trip.get('toTaz'))

    # 4) group vehicle IDs by zone‐pair
    grouping = defaultdict(list)
    for vid, (fz, tz) in zones_for_trip.items():
        grouping[(fz, tz)].append(vid)

    # 5) load TAZ definitions
    taz_map = parse_taz_file(taz_file)

    # 6) ensure output folder for intermediate routes
    routes_dir = os.path.join(base_dir, out_routes_dir)
    os.makedirs(routes_dir, exist_ok=True)

    # 7) for each (fromZ, toZ), invoke findAllRoutes and cache results
    route_cache = {}  # (fz,tz) → list of "edge1 edge2 …"
    for (fz, tz), vids in grouping.items():
        src_edges = taz_map.get(fz, [])
        tgt_edges = taz_map.get(tz, [])
        if not src_edges or not tgt_edges:
            print(f"[WARN] no edges for zone {fz} or {tz}, skipping", file=sys.stderr)
            continue

        out_file = os.path.join(routes_dir, f"routes_{fz}_{tz}.xml")
        call_findAllRoutes_inprocess(findall_script, net_file, src_edges, tgt_edges, out_file)

        # parse the generated routes XML
        routes_tree = ET.parse(out_file)
        routes_root = routes_tree.getroot()
        edge_lists = [r.get('edges') for r in routes_root.findall('route')]
        if not edge_lists:
            print(f"[WARN] no routes found between zones {fz}->{tz}", file=sys.stderr)
        route_cache[(fz, tz)] = edge_lists

    # 8) Replace each failed <trip> with a <vehicle route="…">
    for trip in list(root.findall('trip')):
        vid = trip.get('id')
        if vid not in failed_ids:
            continue
        fz, tz = zones_for_trip[vid]
        candidates = route_cache.get((fz, tz))
        if not candidates:
            continue  # still no route, leave as-is or drop
        chosen = candidates[0]  # pick first found route

        attrib = {
            'id':         vid,
            'type':       trip.get('type', 'car'),
            'depart':     trip.get('depart'),
            'departLane': trip.get('departLane'),
            'departPos':  trip.get('departPos'),
            'departSpeed':trip.get('departSpeed'),
            'route':      chosen
        }
        veh = ET.Element('vehicle', attrib)
        root.append(veh)
        root.remove(trip)

    # 9) Write out the repaired trips file
    out_path = os.path.join(base_dir, out_trips)
    tree.write(out_path, encoding='utf-8', xml_declaration=True)
    print(f"Repaired trips written to: {out_path}")

In [None]:
repair_trips(
    base_dir    = "/home/hoai-linh.dao/Works/EVCS/CEREMA-Mini/result/experiments/20-05-25-checknewmap",
    net_file    = "/home/hoai-linh.dao/Works/EVCS/CEREMA-Mini/result/net-repairment/cleaned_p2_2_newtest-osm.net.xml",
    taz_file    = "/home/hoai-linh.dao/Works/EVCS/CEREMA-Mini/result/experiments/20-05-25-checknewmap/taz.xml",
    findall_script = "/home/hoai-linh.dao/Envs/sumo-env/lib/python3.10/site-packages/sumo/tools/findAllRoutes.py"
)

Found 622 failed trips.
