In [13]:
import json
from pathlib import Path
import os
import pm4py

# COMBINE ITAM WITH SCENE VIDEOS - CATEGORIZED BY PROCESS INSTANCE

def merge_ocel_files(input_folder, additional_file, output_file, scenes, instance):

    # Initiate structure of combined ocel
    merged_ocel = {
        "ocel:global-event": {},
        "ocel:global-object": {},
        "ocel:global-log": {
            "ocel:attribute-names": set(),
            "ocel:object-types": set(),
            "ocel:version": None,
            "ocel:ordering": None,
        },
        "ocel:events": {},
        "ocel:objects": {},
    }
    
    additional_file_path = Path(additional_file)
    if additional_file_path.exists():
        with open(additional_file_path, 'r') as f:
            additional_ocel = json.load(f)
        
         # combine `ocel:global-log`
        merged_ocel["ocel:global-log"]["ocel:attribute-names"].update(
            additional_ocel["ocel:global-log"].get("ocel:attribute-names", [])
        )
        merged_ocel["ocel:global-log"]["ocel:object-types"].update(
            additional_ocel["ocel:global-log"].get("ocel:object-types", [])
        )
        if merged_ocel["ocel:global-log"]["ocel:version"] is None:
            merged_ocel["ocel:global-log"]["ocel:version"] = additional_ocel["ocel:global-log"].get("ocel:version")
        if merged_ocel["ocel:global-log"]["ocel:ordering"] is None:
            merged_ocel["ocel:global-log"]["ocel:ordering"] = additional_ocel["ocel:global-log"].get("ocel:ordering")
        
        # combine events and objects
        merged_ocel["ocel:events"].update(additional_ocel.get("ocel:events", {}))
        merged_ocel["ocel:objects"].update(additional_ocel.get("ocel:objects", {}))

    # Iteration of specified scenes
    for scene in scenes:
        # set the customized scene number as well as process instance
        file_path = Path(input_folder) / f"filtered_scene{scene}_video_ocel_{instance}.jsonocel"
        
        if not file_path.exists():
            print(f"Warnung: Datei {file_path} wurde nicht gefunden.")
            continue
        
        with open(file_path, "r") as f:
            ocel_data = json.load(f)
        
        # combine `ocel:global-log`
        merged_ocel["ocel:global-log"]["ocel:attribute-names"].update(
            ocel_data["ocel:global-log"].get("ocel:attribute-names", [])
        )
        merged_ocel["ocel:global-log"]["ocel:object-types"].update(
            ocel_data["ocel:global-log"].get("ocel:object-types", [])
        )
        if merged_ocel["ocel:global-log"]["ocel:version"] is None:
            merged_ocel["ocel:global-log"]["ocel:version"] = ocel_data["ocel:global-log"].get("ocel:version")
        if merged_ocel["ocel:global-log"]["ocel:ordering"] is None:
            merged_ocel["ocel:global-log"]["ocel:ordering"] = ocel_data["ocel:global-log"].get("ocel:ordering")
        
        # combine events and objects
        merged_ocel["ocel:events"].update(ocel_data.get("ocel:events", {}))
        merged_ocel["ocel:objects"].update(ocel_data.get("ocel:objects", {}))
    
    # sort events by ocel:timestamp
    sorted_events = dict(
        sorted(
            # use items to get key-value-relation
            merged_ocel["ocel:events"].items(),
            # [1] stands for the value
            key=lambda item: item[1].get("ocel:timestamp")
        )
    )
    
    merged_ocel["ocel:events"]=sorted_events
    
    # convert sets back to lists
    merged_ocel["ocel:global-log"]["ocel:attribute-names"] = list(merged_ocel["ocel:global-log"]["ocel:attribute-names"])
    merged_ocel["ocel:global-log"]["ocel:object-types"] = list(merged_ocel["ocel:global-log"]["ocel:object-types"])
    
    # save the combined ocel
    with open(output_file, "w") as output_f:
        json.dump(merged_ocel, output_f, indent=4)

    print(f"Kombinierte OCEL-Datei wurde gespeichert: {output_file}")

In [17]:
input_folder_path = "../dataProcessing/filteredSceneVideoOCEL/"
additional_file_path = "../dataProcessing/filtered_itam_ocel.jsonocel"
instance = "ssaco"
output_file_path = f"../dataProcessing/itamSceneCombinedOCEL/itam_scene_combined_{instance}.jsonocel"
scenes = ['03', '05', '06', '11', '14']

merge_ocel_files(input_folder_path,additional_file_path, output_file_path, scenes, instance)

Kombinierte OCEL-Datei wurde gespeichert: ../dataProcessing/itamSceneCombinedOCEL/itam_scene_combined_ssaco.jsonocel


In [19]:
read_ocel =pm4py.read_ocel_json('../dataProcessing/itamSceneCombinedOCEL/itam_scene_combined_adtc.jsonocel')
read_ocel.objects['ocel:oid']

0                    A1
1                    L1
2                    C1
3                    P2
4                    M1
5                    C2
6                    P3
7                    C6
8                    L2
9                    P1
11                   L5
12                   L3
13                   C5
14          Headset USB
15                   C4
17                   L6
18                   A2
19                   C3
20                   T1
23                   M2
24                   C7
25                   T3
26                   M3
27                   C8
29                   M4
30                   M6
31                 door
32      it_working_desk
33         laptop_shelf
34       mouse_cupboard
35      monitor_storage
36    keyboard_cupboard
37                  L16
38                   T2
Name: ocel:oid, dtype: string