In [1]:
from xopen import xopen
from collections import defaultdict

import gzip
import polars as pl
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

In [2]:
PLAN_PATH = "/Users/andre/Desktop/Cergy/MATSim/matsim-berlin/berlin-v6.4.output_plans.xml.gz"
EVENTS_PATH = "/Users/andre/Desktop/Cergy/MATSim/matsim-berlin/berlin-v6.4.output_events.xml.gz"
PASSAGES_PATH = "/Users/andre/Desktop/Cergy/Python_Scripts/runs/fixed_10pct/matsim/passages.parquet"

In [3]:
passages_df = pl.read_parquet(PASSAGES_PATH)
passages_df = (
    passages_df
    .filter(~pl.col("vehicle").str.contains("pt"), 
            ~pl.col("vehicle").str.contains("freight"))
    .sort(pl.col(["vehicle","time", "link_id"]))
    .with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("index"))
)

In [4]:
with_neighbors = (
    passages_df
    .with_columns([
        pl.col("time").shift(-1).over(["vehicle", "link_id"]).alias("next_time"),
        pl.col("event_type").shift(-1).over(["vehicle", "link_id"]).alias("next_event"),
        pl.col("time").shift(1).over(["vehicle", "link_id"]).alias("prev_time"),
        pl.col("event_type").shift(1).over(["vehicle", "link_id"]).alias("prev_event"),
    ])
)

In [5]:
# Entry lag = from "enters traffic" → next event
entry_lag_df = (
    with_neighbors
    .filter(pl.col("event_type") == "vehicle enters traffic",
            pl.col("next_event").is_not_null())
    .with_columns([
        (pl.col("next_time") - pl.col("time")).alias("entry_lag")
    ])
    .drop(["prev_event", "prev_time"])

)

In [6]:
# Exit lag = from previous event → "leaves traffic"
exit_lag_df = (
    with_neighbors
    .filter(pl.col("event_type") == "vehicle leaves traffic",
            pl.col("prev_event").is_not_null())
    .with_columns([
        (pl.col("time") - pl.col("prev_time")).alias("exit_lag")
    ])
    .drop(["event_type", "next_event", "next_time"])
)

In [9]:
passages_df.filter(pl.col("link_id")=='4937325', time)

time,link_id,vehicle,event_type,index
f64,str,str,str,i64
50093.0,"""4937325""","""berlin_5d088b40_car""","""entered link""",0
50128.0,"""4937325""","""berlin_5d088b40_car""","""vehicle leaves traffic""",1
52260.0,"""4937325""","""berlin_5d088b40_car""","""vehicle leaves traffic""",2
52260.0,"""4937325""","""berlin_5d088b40_car""","""vehicle enters traffic""",3
60414.0,"""4937325""","""berlin_5d088b40_car""","""vehicle enters traffic""",4
60415.0,"""4937325""","""berlin_5d088b40_car""","""left link""",5


In [10]:
# vehicles entring links
entries = (passages_df.filter(pl.col("event_type") == "vehicle enters traffic").rename({"time": "entered_traffic_time"})
              .drop(pl.col("event_type")))

# vehicles exiting links
exits = (passages_df.filter(pl.col("event_type") == "vehicle leaves traffic").rename({"time": "left_traffic_time"})
            .drop(pl.col("event_type")))

# entered link
entered = (passages_df.filter(pl.col("event_type") == "entered link").rename({"time": "entry_time"})
           .drop(pl.col("event_type")))

# left link
left = (passages_df.filter(pl.col("event_type") == "left link")
        .rename({"time": "exit_time"})
        .drop(pl.col("event_type")))

In [11]:
#entries = entries.with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("j_index"))
#exits = exits.with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("j_index"))

entered = entered.with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("j_index"))
left = left.with_columns(pl.int_range(0, pl.len()).over("vehicle", "link_id").alias("j_index"))

In [29]:
reshaped = (
    entered
    .join(left, on=["j_index", "vehicle", "link_id"], how="left")
    #.with_columns([(pl.col("exit_time") - pl.col("entry_time")).alias("duration")])
    .drop(pl.col("j_index")))

In [13]:
entries

entered_traffic_time,link_id,vehicle,index
f64,str,str,i64
30534.0,"""-30051377""","""bb_00005f6f_car""",0
35835.0,"""-313182541#1""","""bb_00005f6f_car""",2
53011.0,"""-30051377""","""bb_00005f6f_car""",4
57132.0,"""-73029200""","""bb_00005f6f_car""",3
58829.0,"""-73029200""","""bb_00005f6f_car""",4
…,…,…,…
48615.0,"""-4483958#0""","""goodsTraffic_re_vkz.1942_5_5_c…",2
57462.0,"""-680545821""","""goodsTraffic_re_vkz.1942_5_5_c…",2
26678.0,"""-10774679#0""","""goodsTraffic_re_vkz.1952_3_1_c…",0
39751.0,"""-1111155175""","""goodsTraffic_re_vkz.1952_3_1_c…",2


In [14]:
exits

left_traffic_time,link_id,vehicle,index
f64,str,str,i64
31787.0,"""-313182541#1""","""bb_00005f6f_car""",1
37025.0,"""-30051377""","""bb_00005f6f_car""",3
56273.0,"""-73029200""","""bb_00005f6f_car""",1
57132.0,"""-73029200""","""bb_00005f6f_car""",2
62433.0,"""-30051377""","""bb_00005f6f_car""",7
…,…,…,…
50552.0,"""-680545821""","""goodsTraffic_re_vkz.1942_5_5_c…",1
68580.0,"""-49075754#0""","""goodsTraffic_re_vkz.1942_5_5_c…",3
37749.0,"""-1111155175""","""goodsTraffic_re_vkz.1952_3_1_c…",1
49314.0,"""-252504436""","""goodsTraffic_re_vkz.1952_3_1_c…",1


In [33]:
entered.filter(pl.col("link_id")=="30051377")

entry_time,link_id,vehicle,index,j_index
f64,str,str,i64,i64
30535.0,"""30051377""","""bb_00005f6f_car""",0,0
53012.0,"""30051377""","""bb_00005f6f_car""",2,1
23197.0,"""30051377""","""bb_391297b1_car""",0,0
45879.0,"""30051377""","""bb_a399a546_car""",0,0
20578.0,"""30051377""","""bb_e7f71f2f_car""",0,0
…,…,…,…,…
29450.0,"""30051377""","""commercialPersonTraffic_servic…",0,0
60803.0,"""30051377""","""commercialPersonTraffic_servic…",0,0
33264.0,"""30051377""","""commercialPersonTraffic_servic…",0,0
63881.0,"""30051377""","""goodsTraffic_12054000_5_235_ca…",0,0


In [None]:
entered.filter(pl.col("link_id")=="30051377")

In [34]:
left.filter(pl.col("link_id")=="30051377")

exit_time,link_id,vehicle,index,j_index
f64,str,str,i64,i64
30552.0,"""30051377""","""bb_00005f6f_car""",1,0
53029.0,"""30051377""","""bb_00005f6f_car""",3,1
23214.0,"""30051377""","""bb_391297b1_car""",1,0
45896.0,"""30051377""","""bb_a399a546_car""",1,0
20595.0,"""30051377""","""bb_e7f71f2f_car""",1,0
…,…,…,…,…
29467.0,"""30051377""","""commercialPersonTraffic_servic…",1,0
60820.0,"""30051377""","""commercialPersonTraffic_servic…",1,0
33281.0,"""30051377""","""commercialPersonTraffic_servic…",1,0
63898.0,"""30051377""","""goodsTraffic_12054000_5_235_ca…",1,0


In [42]:
# For entered DataFrame
most_entered = (
    entered.group_by("link_id")
    .len()
    .sort("len", descending=True)
    .select(["link_id", "len"])
    .head(1)
)

# For left DataFrame
most_left = (
    left.group_by("link_id")
    .len()
    .sort("len", descending=True)
    .select(["link_id", "len"])
    .head(1)
)

In [49]:
most_entered_link = entered.filter(pl.col('link_id')=="462101683") # used 740 times 
most_left_link = left.filter(pl.col('link_id')=="462101683")

In [50]:
most_entered_link.write_parquet("/Users/andre/Desktop/Cergy/Python_Scripts/runs/fixed_10pct/matsim/most_entered_link.parquet")
most_left_link.write_parquet("/Users/andre/Desktop/Cergy/Python_Scripts/runs/fixed_10pct/matsim/most_left_link.parquet")

In [64]:
busiest_link = (
    reshaped
    .filter(pl.col('link_id') == "462101683")
    .with_columns([
        (pl.col('exit_time') - pl.col('entry_time')).alias('duration')
    ])
    .drop(['index', 'index_right'])
)
# Reorder columns: all except times and duration, then entry_time, exit_time, duration
cols = [c for c in busiest_link.columns if c not in ['entry_time', 'exit_time', 'duration']]
ordered_cols = cols + ['entry_time', 'exit_time', 'duration']

busiest_link = busiest_link.select(ordered_cols)
busiest_link.write_parquet("/Users/andre/Desktop/Cergy/Python_Scripts/runs/fixed_10pct/matsim/busiest_link.parquet")

In [63]:
busiest_link

link_id,vehicle,entry_time,exit_time,duration
str,str,f64,f64,f64
"""462101683""","""bb_008101db_car""",57174.0,57188.0,14.0
"""462101683""","""bb_0aa749cb_car""",25361.0,25375.0,14.0
"""462101683""","""bb_0c25e919_car""",38118.0,38132.0,14.0
"""462101683""","""bb_0e567d10_car""",60424.0,60446.0,22.0
"""462101683""","""bb_0f447985_car""",56354.0,56368.0,14.0
…,…,…,…,…
"""462101683""","""goodsTraffic_re_vkz.1801_3_6_t…",37862.0,37876.0,14.0
"""462101683""","""goodsTraffic_re_vkz.1801_3_6_t…",51927.0,51941.0,14.0
"""462101683""","""goodsTraffic_re_vkz.1823_5_17_…",27453.0,27467.0,14.0
"""462101683""","""goodsTraffic_re_vkz.1831_3_9_c…",46711.0,46725.0,14.0


In [30]:
reshaped.filter(pl.col('index')>pl.col('index_right'))

entry_time,link_id,vehicle,index,exit_time,index_right
f64,str,str,i64,f64,i64
37009.0,"""-30051377""","""bb_00005f6f_car""",2,30535.0,1
62417.0,"""-30051377""","""bb_00005f6f_car""",6,53012.0,5
44128.0,"""-50834551#1""","""bb_001f3cb3_car""",2,25769.0,1
53133.0,"""-50834551#1""","""bb_001f3cb3_car""",6,44268.0,5
49005.0,"""-40345966#0""","""bb_0035e9f6_car""",2,43044.0,1
…,…,…,…,…,…
53731.0,"""-1079234339""","""goodsTraffic_re_vkz.1941_5_4_t…",2,27785.0,1
74080.0,"""-1079234339""","""goodsTraffic_re_vkz.1941_5_7_t…",2,31422.0,1
62693.0,"""-175916608#1""","""goodsTraffic_re_vkz.1942_4_2_c…",2,24211.0,1
68579.0,"""-49075754#0""","""goodsTraffic_re_vkz.1942_5_5_c…",2,26097.0,1


In [24]:
reshaped.filter(pl.col('index')>pl.col('index_right')).sort("index_right")

entry_time,link_id,vehicle,index,exit_time,index_right
f64,str,str,i64,f64,i64
30839.0,"""-1000471322""","""bb_10fa5977_car""",2,29042.0,1
64999.0,"""-1000471322""","""bb_7f4b37aa_car""",2,26174.0,1
57600.0,"""-1000471322""","""bb_9c72e09b_car""",2,53621.0,1
58321.0,"""-1000471322""","""bb_b9273fde_car""",2,24793.0,1
52800.0,"""-1000471322""","""bb_db427f64_car""",2,29893.0,1
…,…,…,…,…,…
64343.0,"""23176498""","""bb_ab7cd12e_car""",18,62252.0,17
62759.0,"""4045220""","""berlin_010ba4c6_car""",18,61826.0,17
78480.0,"""-26243942""","""berlin_88ce15b7_car""",22,78282.0,21
63080.0,"""-5785905#0""","""bb_9921a388_car""",22,56036.0,21


In [16]:
avg_durations = (
    passages_df
    .group_by("link_id")
    .agg([
        pl.col("duration").mean().alias("avg_duration"),
        pl.col("duration").count().alias("n_passages"),
        pl.col("duration").sum().alias("total_duration")
    ])
    .sort("avg_duration", descending=True)
)

In [17]:
avg_durations

link_id,avg_duration,n_passages,total_duration
str,f64,u32,f64
"""-5026771""",,0,0.0
"""-649556585""",,0,0.0
"""-24879167""",,0,0.0
"""-15742783#1""",,0,0.0
"""-4898660#1""",,0,0.0
…,…,…,…
"""-76635171#1""",-58854.5,2,-117709.0
"""-8771232#0""",-63856.0,1,-63856.0
"""-284232568""",-64237.5,2,-128475.0
"""-10953348""",-69690.0,1,-69690.0
