In [13]:
import xml.etree.ElementTree as ET
import pandas as pd

def parse_detector_output(xml_file):
    """
    Parse SUMO induction loop detector output (detector_output.xml)
    into a pandas DataFrame.
    """
    rows = []

    # Parse line by line (robust against big files)
    with open(xml_file, "r") as f:
        for line in f:
            if line.strip().startswith("<interval"):
                try:
                    element = ET.fromstring(line.strip())
                    rows.append(element.attrib)
                except ET.ParseError:
                    continue

    # Convert to DataFrame
    df = pd.DataFrame(rows)

    # Convert numeric columns to floats where possible
    for col in df.columns:
        try:
            df[col] = df[col].astype(float)
        except ValueError:
            pass  # leave non-numeric (like "id") as string

    return df

In [18]:
xml_file = "../traffic simulation/2906/detector_output.xml"
df = pd.DataFrame(parse_detector_output(xml_file))
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 684 entries, 0 to 683
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   begin              684 non-null    float64
 1   end                684 non-null    float64
 2   id                 684 non-null    object 
 3   nVehContrib        684 non-null    float64
 4   flow               684 non-null    float64
 5   occupancy          684 non-null    float64
 6   speed              684 non-null    float64
 7   harmonicMeanSpeed  684 non-null    float64
 8   length             684 non-null    float64
 9   nVehEntered        684 non-null    float64
dtypes: float64(9), object(1)
memory usage: 53.6+ KB
