In [None]:
import rust_bridge_pm_py

In [None]:
# Python-side PyBridgeEventLog construction: Slow!
import pandas as pd
def pm4py_log_to_bridge_log(df: pd.DataFrame):
  log = rust_bridge_pm_py.native.PyBridgeEventLog()
  for trace_id,a in df.groupby(['case:concept:name']):
    trace = rust_bridge_pm_py.native.PyBridgeTrace(str(trace_id))
    for (label,series) in a.iterrows():
      event = rust_bridge_pm_py.native.PyBridgeEvent({k: str(v) for (k,v) in series.to_dict().items()})
      trace.append_event(event)
    log.append_trace(trace)
  return log

In [None]:
# Generate huge event log (on Python side, but structs reside in Rust!)
log = rust_bridge_pm_py.native.PyBridgeEventLog()
for i in range(200000):
  trace = rust_bridge_pm_py.native.PyBridgeTrace("Trace " + str(i))
  for j in range(15):
    event = rust_bridge_pm_py.native.PyBridgeEvent({"concept:name": "Activity " + str(j)})
    trace.insert_event(j,event)
  log.insert_trace(i,trace)

In [None]:
# Sample call: Add artificial start and end activities to every trace
res_log = rust_bridge_pm_py.native.test_bridge_log(log)
assert res_log.traces[0].events[0].attributes.get("concept:name") == "__START__"
assert res_log.traces[0].events[-1].attributes.get("concept:name") == "__END__"

In [None]:
# Transform PyBridgeEventLog to dict (only keeping activity names + case id)
traces = []
for trace in log.traces:
  events = []
  for event in trace.events:
    events.append({"concept:name": event.attributes.get("concept:name")})
  traces.append({"case:concept:name": trace.attributes.get("case:concept:name"),"events": events})


In [None]:
import pm4py
import polars
# Read some event log
log_df = pm4py.read_xes("../../../../dow/event_logs/Sepsis Cases - Event Log.xes.gz")

In [None]:
# First convert DF log to PyBridgeEventLog Wrapper and then add start/end acts.
# Result is again a PyBridgeEventLog Wrapper 
# Performance: Okay for smallish/normal logs but poor for very large ones
res_log = rust_bridge_pm_py.native.test_bridge_log(pm4py_log_to_bridge_log(log_df))

In [None]:
# Idea: Do not require Polar dependency on python side, by exporting JSON with pandas
# and then importing it using Polar on the Rust side
log = rust_bridge_pm_py.native.test_df_pandas(log_df.to_json(orient="records"))

In [None]:
# Idea: Do DataFrame -> Wrapper conversion on Rust side (+ in parallel)
# For that, polars is used (because of the first-class Rust support)
# First, convert log (pandas) DF to polars DF and then convert it to PyBridgeEventLog Wrapper in Rust, return Result 
# Performance: Pretty good :) 
log = rust_bridge_pm_py.native.polars_df_to_log(polars.from_pandas(log_df))

In [None]:
# Create huge event log & process it in Rust
# For conversion, use json bytes (using orjson library)
# Not great performance...
# Total: 15308.481216430664ms; Json Dump & Log Re-construction from Dict takes the most time
# This prompted the experimentation with PyBridgeEventLog Wrapper, living in Rust
l = rust_bridge_pm_py.event_log.py_test_event_log()