In [2]:
import json
from pathlib import Path
import os
import pm4py

# COMBINE ITAM_SCENE WITH MOV - CATEGORIZED BY PROCESS INSTANCE

def merge_ocel_files(input_folder, additional_file, output_file, scenes, instance):

    # Initiate structure of combined ocel
    merged_ocel = {
        "ocel:global-event": {},
        "ocel:global-object": {},
        "ocel:global-log": {
            "ocel:attribute-names": set(),
            "ocel:object-types": set(),
            "ocel:version": None,
            "ocel:ordering": None,
        },
        "ocel:events": {},
        "ocel:objects": {},
    }
    
    # Existing itam_scene file
    additional_file_path = Path(additional_file)
    if additional_file_path.exists():
        with open(additional_file_path, 'r') as f:
            additional_ocel = json.load(f)
        
         # combine `ocel:global-log`
        merged_ocel["ocel:global-log"]["ocel:attribute-names"].update(
            additional_ocel["ocel:global-log"].get("ocel:attribute-names", [])
        )
        merged_ocel["ocel:global-log"]["ocel:object-types"].update(
            additional_ocel["ocel:global-log"].get("ocel:object-types", [])
        )
        if merged_ocel["ocel:global-log"]["ocel:version"] is None:
            merged_ocel["ocel:global-log"]["ocel:version"] = additional_ocel["ocel:global-log"].get("ocel:version")
        if merged_ocel["ocel:global-log"]["ocel:ordering"] is None:
            merged_ocel["ocel:global-log"]["ocel:ordering"] = additional_ocel["ocel:global-log"].get("ocel:ordering")
        
        # combine events and objects
        merged_ocel["ocel:events"].update(additional_ocel.get("ocel:events", {}))
        merged_ocel["ocel:objects"].update(additional_ocel.get("ocel:objects", {}))

    # Iteration of specified scenes
    for scene in scenes:
        # set the customized scene number as well as process instance
        file_path = Path(input_folder) / f"filtered_mov{scene}_{instance}.jsonocel"
        
        if not file_path.exists():
            print(f"Warnung: Datei {file_path} wurde nicht gefunden.")
            continue
        
        with open(file_path, "r") as f:
            ocel_data = json.load(f)
        
        # combine `ocel:global-log`
        merged_ocel["ocel:global-log"]["ocel:attribute-names"].update(
            ocel_data["ocel:global-log"].get("ocel:attribute-names", [])
        )
        merged_ocel["ocel:global-log"]["ocel:object-types"].update(
            ocel_data["ocel:global-log"].get("ocel:object-types", [])
        )
        if merged_ocel["ocel:global-log"]["ocel:version"] is None:
            merged_ocel["ocel:global-log"]["ocel:version"] = ocel_data["ocel:global-log"].get("ocel:version")
        if merged_ocel["ocel:global-log"]["ocel:ordering"] is None:
            merged_ocel["ocel:global-log"]["ocel:ordering"] = ocel_data["ocel:global-log"].get("ocel:ordering")
        
        # combine events and objects
        merged_ocel["ocel:events"].update(ocel_data.get("ocel:events", {}))
        merged_ocel["ocel:objects"].update(ocel_data.get("ocel:objects", {}))
    
    # sort events by ocel:timestamp
    sorted_events = dict(
        sorted(
            # use items to get key-value-relation
            merged_ocel["ocel:events"].items(),
            # [1] stands for the value
            key=lambda item: item[1].get("ocel:timestamp")
        )
    )
    
    merged_ocel["ocel:events"]=sorted_events
    
    # convert sets back to lists
    merged_ocel["ocel:global-log"]["ocel:attribute-names"] = list(merged_ocel["ocel:global-log"]["ocel:attribute-names"])
    merged_ocel["ocel:global-log"]["ocel:object-types"] = list(merged_ocel["ocel:global-log"]["ocel:object-types"])
    
    # save the combined ocel
    with open(output_file, "w") as output_f:
        json.dump(merged_ocel, output_f, indent=4)

    print(f"Kombinierte OCEL-Datei wurde gespeichert: {output_file}")

In [9]:
input_folder_path = "../dataProcessing/filteredMovOCEL/"
instance = "ssaco"
additional_file_path = f"../dataProcessing/itamSceneCombinedOCEL/itam_scene_combined_{instance}.jsonocel"
output_file_path = f"../dataProcessing/itamSceneMovCombinedOCEL/itam_scene_mov_combined_{instance}.jsonocel"
scenes = ['03', '05', '06', '11', '14']

merge_ocel_files(input_folder_path,additional_file_path, output_file_path, scenes, instance)

Kombinierte OCEL-Datei wurde gespeichert: ../dataProcessing/itamSceneMovCombinedOCEL/itam_scene_mov_combined_ssaco.jsonocel


In [7]:
input_folder_path = "../dataProcessing/filteredMovOCEL/"
instance = "adtc2"
additional_file_path = f"../dataProcessing/itamSceneMovCombinedOCEL/itam_scene_mov_combined_adtc.jsonocel"
output_file_path = f"../dataProcessing/itamSceneMovCombinedOCEL/itam_scene_mov_combined_adtc.jsonocel"
scenes = ['15']

merge_ocel_files(input_folder_path,additional_file_path, output_file_path, scenes, instance)

Kombinierte OCEL-Datei wurde gespeichert: ../dataProcessing/itamSceneMovCombinedOCEL/itam_scene_mov_combined_adtc.jsonocel
