In [1]:
from xopen import xopen
from collections import defaultdict

import gzip
import os
import polars as pl
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

In [2]:
PLAN_PATH = "/Users/andre/Desktop/Cergy/MATSim/matsim-berlin/berlin-v6.4.output_plans.xml.gz"
PASSAGES_PATH = "/Users/andre/Desktop/Cergy/Python_Scripts/runs/fixed_10pct/output/passages.parquet"
METROPOLIS_OUTPUT = "/Users/andre/Desktop/Cergy/Python_Scripts/runs/fixed_10pct/output"

In [3]:
passages_df = pl.read_parquet(PASSAGES_PATH)
passages_df = (
    passages_df
    .filter(~pl.col("vehicle").str.contains("pt"), 
            ~pl.col("vehicle").str.contains("freight")#,
#            ~pl.col("vehicle").str.contains("goods"),
#            ~pl.col("vehicle").str.contains("commercial")
           )
    .sort(pl.col(["vehicle","time", "link_id"]))
    .with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("index"))
)

In [4]:
passages_df = passages_df.sort(pl.col('vehicle', 'time', 'index', 'link_id'))

In [5]:
# entered link
entered = (passages_df.filter((pl.col("event_type") == "entered link")|(
                              pl.col("event_type") == "vehicle enters traffic"))
           .rename({"time": "entry_time"})
           .drop(pl.col("event_type")))

# left link
left = (passages_df.filter((pl.col("event_type") == "left link")|(
                           pl.col("event_type") == "vehicle leaves traffic"))
        .rename({"time": "exit_time"})
        .drop(pl.col("event_type")))

In [6]:
entered = entered.with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("j_index"))
left = left.with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("j_index"))

In [7]:
reshaped = (
    entered
    .join(left, on=["j_index", "vehicle", "link_id"], how="full")
    .with_columns([(pl.col("exit_time") - pl.col("entry_time")).alias("duration")])
    .drop(pl.col("j_index")))

In [8]:
reshaped = reshaped.select(["link_id", "vehicle", "entry_time", "exit_time", "duration"])

In [17]:
entered.filter(vehicle='bb_00005f6f_car')

entry_time,link_id,vehicle,index,j_index
f64,str,str,i64,i64
30534.0,"""-30051377""","""bb_00005f6f_car""",0,0
30535.0,"""30051377""","""bb_00005f6f_car""",0,0
30552.0,"""1029686950""","""bb_00005f6f_car""",0,0
30579.0,"""24557971""","""bb_00005f6f_car""",0,0
30917.0,"""328050969""","""bb_00005f6f_car""",0,0
…,…,…,…,…
61424.0,"""-401668993""","""bb_00005f6f_car""",0,0
61451.0,"""-432254960""","""bb_00005f6f_car""",0,0
61468.0,"""-30051378""","""bb_00005f6f_car""",0,0
61539.0,"""-328050968""","""bb_00005f6f_car""",0,0


In [10]:
avg_durations = (
    reshaped
    .group_by("link_id")
    .agg([
        pl.col("duration").mean().alias("avg_duration"),
        pl.col("duration").count().alias("Nb passages"),
        pl.col("duration").sum().alias("total_duration")
    ])
    .sort("avg_duration", descending=True)
)

In [11]:
avg_durations

link_id,avg_duration,Nb passages,total_duration
str,f64,u32,f64
"""-25210595#0""",1996.963855,166,331496.0
"""522458224""",941.0,4,3764.0
"""244944011""",893.0,5,4465.0
"""345754464""",810.197143,350,283569.0
"""-718809953""",742.625581,430,319329.0
…,…,…,…
"""-27815725#0""",0.5,10,5.0
"""-33847096#1""",0.5,2,1.0
"""-152354580#2""",0.5,8,4.0
"""-222369309#3""",0.5,2,1.0


# Metropolis Routes

In [12]:
mp_routes = (
    pl.read_parquet(os.path.join(METROPOLIS_OUTPUT, "route_results.parquet"))
    .with_columns([(pl.col('exit_time')-pl.col('entry_time')).alias('duration')])
    .drop(['trip_index', 'trip_id'])
)

In [13]:
mp_routes.filter(pl.col('agent_id')==4266401372862).head(14)

agent_id,edge_id,entry_time,exit_time,duration
u64,u64,f64,f64,f64
4266401372862,178144,32265.0,32280.0,15.0
4266401372862,47467,32280.0,32293.0,13.0
4266401372862,47466,32293.0,32296.0,3.0
4266401372862,187512,32296.0,32302.0,6.0
4266401372862,122674,32302.0,32320.0,18.0
…,…,…,…,…
4266401372862,170316,32415.0,32421.0,6.0
4266401372862,170317,32421.0,32438.0,17.0
4266401372862,198837,32438.0,32446.0,8.0
4266401372862,198838,32446.0,32457.0,11.0


In [14]:
reshaped.filter(pl.col('vehicle')=='bb_5de742f7_car',
                pl.col('entry_time')<34000)[15:25]

link_id,vehicle,entry_time,exit_time,duration
str,str,f64,f64,f64
"""-43711339""","""bb_5de742f7_car""",33267.0,33268.0,1.0
"""4682661""","""bb_5de742f7_car""",33268.0,33309.0,41.0
"""909890129""","""bb_5de742f7_car""",33309.0,33348.0,39.0
"""-624251117#0""","""bb_5de742f7_car""",33348.0,33350.0,2.0
"""-237149172""","""bb_5de742f7_car""",33350.0,33367.0,17.0
"""38628509""","""bb_5de742f7_car""",33367.0,33374.0,7.0
"""624251116""","""bb_5de742f7_car""",33374.0,33383.0,9.0
"""624251115#0""","""bb_5de742f7_car""",33383.0,33397.0,14.0
"""-624193320#1""","""bb_5de742f7_car""",33397.0,33401.0,4.0
"""-624193319#1""","""bb_5de742f7_car""",33401.0,33437.0,36.0
