### Setup and Import Section ###

In [1]:
# replicateExp.ipynb
import os

# --- Setup Section ---
# This part of the code takes care of preparing the environment, both on Colab and locally.

# Check if we are on Google Colab
# Colab sets some specific environment variables
IS_COLAB = 'google.colab' in str(get_ipython()) if 'ipython' in globals() else False

if IS_COLAB:
    print("Running on Google Colab. Setting up environment...")
    # Name of your GitHub repository
    repo_name = "il_tuo_repo" # You might want to update this to your actual repo name
    repo_url = f"https://github.com/il_tuo_utente/{repo_name}.git" # And this with your username/repo

    # Clone the repository if it doesn't already exist
    if not os.path.exists(repo_name):
        !git clone {repo_url}
        print(f"Repository '{repo_name}' cloned.")
    else:
        print(f"Repository '{repo_name}' already exists. Skipping clone.")

    # Change to the repository directory
    os.chdir(repo_name)
    print(f"Changed directory to: {os.getcwd()}")

# --- End Setup Section ---

from vlmcProcessMining import *
import numpy as np
import argparse
import os
import subprocess
import sys
import json
import re
from scipy.io import savemat
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
import pathlib
import shutil
import tqdm
import time
import glob

import warnings
import xml.parsers.expat
import warnings
from pm4py.objects.log.importer.xes import importer as xes_importer
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.objects.log.exporter.xes import exporter as xes_exporter
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.objects.petri_net.importer import importer as pnml_importer
from pm4py.algo.simulation.playout.petri_net import algorithm as simulator
from pm4py.statistics.variants.log import get as variants_module
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.util import dataframe_utils
import gzip
import tempfile
import atexit

import matplotlib.pyplot as plt
from scipy import stats


noisydir=Path(os.getcwd())/"data"/"noisy_data"
ldir=Path(os.getcwd())/pathlib.Path("likelyhood")
dataDir=Path("./data/sldpn-reproducibility/experiments/")
trainLogDir=dataDir/Path("2-splitlogs")
testLogDir=dataDir/Path("2-splitlogstest")
meausreDir=dataDir/Path("4-measures")
discoveredStochModel=dataDir/Path("3-discoveredstochasticmodels")
enjoySilentDir=Path(os.getcwd())/"data"/"enjoythesilent"

lognames=['bpic12-a','BPI_Challenge_2013_incidents',
          'BPI_Challenge_2013_open_problems','BPI_Challenge_2013_closed_problems',
          'BPI Challenge 2017 - Offer log','bpic2020-DomesticDeclarations',
          'bpic2020-InternationalDeclarations','bpic2020-PrepaidTravelCost',
          'bpic2020-RequestForPayment','Sepsis',
          'Road_Traffic_Fine_Management_Process']

def getVLMCName(inputFile):
    vlmcName=None
    inputFile=Path(inputFile)
    if inputFile.suffix == '.gz':
        vlmcName='.'.join(inputFile.stem.split('.')[0:-1])
    else:
        vlmcName=inputFile.stem
    return vlmcName
    

### Mine The VLMC From an Event Log ###

In [2]:
def createVLMC(inputFile):
    readInputFile(inputFile)
    vlmcName=getVLMCName(inputFile)
    st=time.time()
    mineProcess(ecfFile=f"{os.getcwd()}/data/VLMC/{vlmcName}.ecf",
    infile=f"{os.getcwd()}/data/converted/{vlmcName}.txt",
    vlmcfile=f"{os.getcwd()}/data/VLMC/{vlmcName}.vlmc", 
    nsim="1", ntime="1", alfa="1")
    vlmcTime=time.time()-st
    print(f"VLMC Mined In {vlmcTime}")
    return vlmcTime

### Compute Likelihood With VLMC ###

In [3]:
def getVLMCLikelyhood(vlmcName=None,traceFile=None):
    st=time.time()
    ecfFile=pathlib.Path(f"{os.getcwd()}/data/VLMC/{vlmcName}.ecf").absolute()
    infile=pathlib.Path(f"{os.getcwd()}/data/converted/{vlmcName}.txt").absolute()
    vlmcfile=pathlib.Path(f"{os.getcwd()}/data/VLMC/{vlmcName}2.vlmc").absolute()
    vlmc=pathlib.Path(f"{os.getcwd()}/data/VLMC/{vlmcName}.vlmc").absolute()
    
    cwd=(ldir/pathlib.Path(traceFile.name.split(".")[0]))
    cwd.mkdir(parents=True, exist_ok=True)

    getLikelyhood(
        ecfFile=str(ecfFile),
        infile=str(infile), #dataset used to learn the vlmc
        vlmc=str(vlmc), #input VLMC to use
        vlmcfile=str(vlmcfile),  #output VLMC
        traces=str(traceFile), #traces to compute likelyhood
        cwd=str(cwd),
        outFile="out.mat",nsim="1", ntime="1",alfa="1")
    liktime=time.time()-st
    return liktime

### Preprocess Test Data ###

In [4]:
def prepareTestDate(inputTrace=None):
    datasetName=inputTrace.name
    testsetName=f"{'.'.join(datasetName.split('.')[0:-2])}_test.{'.'.join(datasetName.split('.')[-2:])}"
    testTraceFile=testLogDir/Path(testsetName)
    if(not testTraceFile.is_file()):
        (testLogDir/Path(datasetName)).rename(testTraceFile)
    return testTraceFile

### Compute uEMSC With VLMC Likelihood ###

In [5]:
def uEMSCVLMC(inputLan=None,vlmcName=None):
    st=time.time()
    cwd=(ldir/pathlib.Path(inputLan.name.split(".")[0]))
    cwd.mkdir(parents=True, exist_ok=True)
    
    traceLikName=".".join(inputLan.name.split(".")[0:-1])+".lik"
    traceLikFile=cwd/pathlib.Path(traceLikName)
    
    modelLikName=vlmcName+".vlmc.lik"
    modelLikFile=cwd/pathlib.Path(modelLikName)
    
    uEMSC=computeMuEMSC(traceLik=traceLikFile,modelLik=modelLikFile)
    emsctime=time.time()-st
    return uEMSC,emsctime

In [6]:
def reproduceVLMVExp():
    matching_logs = list(trainLogDir.rglob("*.xes.gz[0-9].xes.gz"))
    #Loop over the logs
    for log in matching_logs:
        #if("Hospital" in str(log) or "testlogdata" in str(log) or "test log" in str(log)):
        #    continue
        if(Path(log).name.split('.')[0] not in lognames):
            print(f"skipped {Path(log).name}")
            continue
        #create Model from Log
        #inputTrace=trainLogDir/Path("SERVICES.csv1.xes.gz0.xes.gz")
        inputTrace=log
        mctime=createVLMC(inputFile=inputTrace)
        
        #Process Test Trace
        testTrace=prepareTestDate(inputTrace=inputTrace)
        readInputFile(inputFile=testTrace)
        
        #Compute TestLanguage LikelyHood
        vlmcName=".".join(inputTrace.name.split(".")[0:-2])
        traceLan=ldir/Path(inputTrace.name.split(".")[0])/Path(f"{vlmcName}_trace.lan")
        testLan=ldir/Path(inputTrace.name.split(".")[0])/Path(f"{vlmcName}_test_trace.lan")
        liktime=getVLMCLikelyhood(vlmcName=vlmcName,traceFile=testLan)
        
        #Compute uEMSC
        uEMSC,uemsctime=uEMSCVLMC(inputLan=testLan,vlmcName=vlmcName)
    
        uEMSCFile=ldir/Path(inputTrace.name.split(".")[0])/Path(f"{vlmcName}.uemsc")
        uEMSCTimeFile=ldir/Path(inputTrace.name.split(".")[0])/Path(f"{vlmcName}.time")
        np.savetxt(str(uEMSCFile),[uEMSC])
        np.savetxt(str(uEMSCTimeFile),[mctime,liktime,uemsctime])


In [20]:
def reproduceNoisyExp(train_log="/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/sldpn-reproducibility/experiments/2-splitlogs/Road_Traffic_Fine_Management_Process.xes.gz0.xes.gz",
                      noisy_pattern="Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_{n}.0pct.xes.gz"):

    train_log=Path(train_log)
    
    noisy_logs = [noisydir / noisy_pattern.format(n=n) for n in [5,10,15,20,25,30,35,40]]
    
    #create Model from Log
    mctime=createVLMC(inputFile=train_log)
    
    #Get VLMC NAME
    vlmcName=".".join(train_log.name.split(".")[0:-2])
    traceLan=ldir/Path(train_log.name.split(".")[0])/Path(f"{'.'.join(train_log.name.split('.')[0:-2])}_trace.lan")

    uEMSCnoisy=[]

    for noisy_log in noisy_logs:

        #Process Noisy Trace
        readInputFile(inputFile=noisy_log)
        noisyLan=ldir/Path(train_log.name.split(".")[0])/Path(f"{'.'.join(noisy_log.name.split('.')[0:-2])}_trace.lan")
        
        liktime=getVLMCLikelyhood(vlmcName=vlmcName,traceFile=noisyLan)
        uEMSC,uemsctime=uEMSCVLMC(inputLan=noisyLan,vlmcName=vlmcName)

        uEMSCnoisy+=[[noisyLan,uEMSC]]

    return pd.DataFrame(uEMSCnoisy)

In [31]:
def noisyres_to_latex_table(df):
    # Estrai %Noise usando regex
    df = df.copy()
    df.columns = ['path', 'uEMSC']  # Rinomina per chiarezza
    
    df['%Noise'] = df['path'].apply(lambda x: re.search(r'(\d+).0pct', str(x)).group(1) if re.search(r'(\d+).0pct', str(x)) else '')
    df['uEMSC'] = df['uEMSC'].round(3)

    # Seleziona e ordina le colonne
    latex_df = df[['%Noise', 'uEMSC']]

    # Converto in LaTeX ben formattato
    latex_table = latex_df.to_latex(index=False, column_format="cc", header=["\\%Noise", "uEMSC"], float_format="%.3f")

    return latex_table


### Collect The Results Of The Data-Aware Paper###

In [7]:
def collectDAwareResults():
    #get all uemc results
    matching_files = list(meausreDir.rglob("*-uemsc.txt"))
    matching_files2= list(meausreDir.rglob("*-duemsc.txt"))
    
    for mf in matching_files:
        for mf2 in matching_files2:
            if(str(mf).replace(mf.name.split("-")[-1],"duemsc.txt")==str(mf2)):
                matching_files2.remove(mf2)

    results=[]
    # Print the results
    for file in (matching_files+matching_files2):
        filePath=Path(file)
        Logname=filePath.parent.name.split(".")[0]
        distech=filePath.parent.name.split("-")[-1]
        splitIdx=int(filePath.name.split("-")[1])
        stochdistech=filePath.name.split("-")[0]
        if("Hospital" in str(Logname) or "testlogdata" in str(Logname) or "test log" in str(Logname) or "TRANSFERS" in str(Logname)):
            continue
        try:
            zemsc=pd.read_csv(filePath,names=["uemsc","type"],header=None,delimiter=" ")
            zemsc=zemsc["uemsc"].values[0]
        except Exception as e:
            print(Logname)
        results+=[[Logname,splitIdx,distech,stochdistech,zemsc]]

    df=pd.DataFrame(results,columns=["logname","split","dis-tech","stoch-tech","zemsc"])
    df = df[df["stoch-tech"] != "dsd2Ohe"]
    df = df[df["stoch-tech"] != "dsd2"]


    df.replace({"stoch-tech": {"dsd": "DSDwe", "dsdOhe": "DSD","alig":"ABE","freq":"FBE","uni":"BUC"}}, inplace=True)
        
    return df

In [8]:
def collectVLMCRes():
    matching_files = list(ldir.rglob("*.uemsc"))
    results=[]
    # Print the results
    for file in matching_files:
        filePath=Path(file)
        Logname=filePath.name.split(".")[0]
        splitIdx=int(re.findall(r'gz(\d+)\.uemsc',filePath.name)[0])
        uemsc=np.loadtxt(filePath)
        #print(Logname,splitIdx,uemsc)
        results+=[[Logname,splitIdx,uemsc]]
    return pd.DataFrame(results,columns=["logname","split","uemsc"])

In [9]:
def collectVLMCTime():
    matching_files = list(ldir.rglob("*.time"))
    results=[]
    # Print the results
    for file in matching_files:
        filePath=Path(file)
        Logname=filePath.name.split(".")[0]
        splitIdx=int(re.findall(r'gz(\d+)\.time',filePath.name)[0])
        time=np.loadtxt(filePath)
        #print(Logname,splitIdx,uemsc)
        results+=[[Logname,splitIdx,time[0],time[1],time[2]]]
    return pd.DataFrame(results,columns=["logname","split","mctime","liktime","uemsctime"])

In [10]:
def getBestPMTech(baselineRes):
    btech=[]
    for l in baselineRes["logname"].unique():
        #Per ogni tecnica stocastica recupera la migliore tecnica deterministrica
        for stech in baselineRes["stoch-tech"].unique():
            b=-100
            bt=None
            for dtech in baselineRes["dis-tech"].unique():    
                try:
                    value=baselineRes[(baselineRes["stoch-tech"]==stech) & (baselineRes["dis-tech"]==dtech) & (baselineRes["logname"]==l)]["zemsc"].mean()
                except Exception as e:
                    print(e,[l,dtech,stech])
                    print(baselineRes[(baselineRes["stoch-tech"]==stech) & (baselineRes["dis-tech"]==dtech) & (baselineRes["logname"]==l)]["zemsc"])
                    raise
                if(value>b):
                    b=value
                    bt=f"{l}${dtech}${stech}"
            if(bt is None):
                #print(f"no data for {l} and {stech}")
                btech+=[f"{l}${dtech}${stech}"]
            else:
                btech+=[bt]
    return pd.DataFrame([t.split("$") for t in btech],columns=["logname","dtech","stech"])

In [11]:
def collectEnjoyTheSilenceRes():
    # Specify the value to find and the value to replace it with
    search_values = ['BPIC12-a','BPIC13-i', 'BPIC13-op','BPIC13-cp',
                     'BPIC17-o','BPIC20-dd','BPIC20-id'
                    'BPIC20-pt','BPIC20-rf','Sepsis','Roadfines']
    replacement_values = ['bpic12-a','BPI_Challenge_2013_incidents','BPI_Challenge_2013_open_problems',
                          'BPI_Challenge_2013_closed_problems','BPI Challenge 2017 - Offer log','bpic2020-DomesticDeclarations',
          'bpic2020-InternationalDeclarations','bpic2020-PrepaidTravelCost',
          'bpic2020-RequestForPayment','Sepsis','Road_Traffic_Fine_Management_Process']
    data=pd.read_csv(enjoySilentDir/"enjoysilent.csv")
    #replacement_dict = dict(zip(search_values, replacement_values))
    #data = data.replace(replacement_dict,regex=True)
    data=data.rename(columns={"Log": "logname", "Discovery": "dis-tech", "sto.dis.": "stoch-tech","uEMSC":"zemsc"})

    split_values = list(range(10))
    data_expanded = data.loc[data.index.repeat(len(split_values))].copy()
    data_expanded['split'] = split_values * len(data)
    data_expanded["zemsc"].replace(to_replace=r"[–—-]", value="0", regex=True, inplace=True)
    #data_expanded["zemsc"].replace(to_replace=r"[–—-]", value="0", regex=True, inplace=True)
    
    return data_expanded

### Reproduce All Paper Results ###

In [None]:
reproduceVLMVExp()

In [None]:
baselineRes=collectDAwareResults()
vlmcRes=collectVLMCRes()
vlmcTime=collectVLMCTime()
enjoydata=collectEnjoyTheSilenceRes()
enjoydata["zemsc"]=enjoydata["zemsc"].astype(float)
baselineRes=pd.concat([baselineRes, enjoydata[baselineRes.columns]], ignore_index=True)
btech=getBestPMTech(baselineRes)

In [None]:
mean_mctime=vlmcTime.groupby('logname')['mctime'].mean().reset_index()
mean_uemsctime=vlmcTime.groupby('logname')['uemsctime'].mean().reset_index()
#mean_mctime["uemsctime"]=mean_uemsctime
times=[]
for l in lognames:
    times+=[[mean_mctime[mean_mctime["logname"]==l]["mctime"].iloc[0],
            mean_uemsctime[mean_uemsctime["logname"]==l]["uemsctime"].iloc[0]]]

timeDF=pd.DataFrame(np.array(times),columns=["mctime","uemsctime"])

### Reproduce Papers Plots ###

In [None]:
#lognames=baselineRes["logname"].unique()
plt.rcParams.update({'font.size': 18})
# Create a figure
medianprops = dict(linestyle=None, linewidth=0, color='firebrick')
meanprops = dict(linestyle="-", linewidth=1.5, color='orange')

#fig, ax = plt.subplots(len(lognames),1, figsize=(9, 30))  # 1 row, 2 columns of subplots
for i,lname in enumerate(lognames):
    #bestMethod=baselineRes[baselineRes['logname']==lname].groupby(by=['dis-tech', 'stoch-tech'])['zemsc'].mean().idxmax()
    bestMethods=btech[(btech["logname"]==lname)]
    x=[]
    names=[]
    for bm in bestMethods.to_numpy():
        names+=[f"{bm[1]}_{bm[2]}"]
        values=baselineRes[(baselineRes["dis-tech"]==bm[1]) & (baselineRes["stoch-tech"]==bm[2]) & (baselineRes["logname"]==bm[0])]["zemsc"].values
        x+=[values.tolist()]

    x+=[vlmcRes[vlmcRes["logname"]==lname]["uemsc"].values]
    names+=["VLMC"]

    for nidx in range(len(names)-1):
        ks=stats.ks_2samp(x[nidx], x[-1])
        if(ks.pvalue<0.05 or len(x[nidx])==0 ):
            #names[nidx]+="-r"
            pass
            
    fig=plt.figure(figsize=(10, 4))
    plt.boxplot(x,tick_labels=names,showmeans=True,meanline=True,medianprops=medianprops,meanprops=meanprops)
    plt.ylabel("uEMSC")
    plt.grid()
    plt.xticks(rotation=30)
    plt.savefig(f'./plots/{lname}.pdf',bbox_inches='tight', pad_inches=0)  

### Reproduce Noisy Experiments ###

In [25]:
train_log="/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/sldpn-reproducibility/experiments/2-splitlogs/Road_Traffic_Fine_Management_Process.xes.gz0.xes.gz"
noisy_pattern_add="Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_{n}.0pct.xes.gz"
noisy_pattern_rem="Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_{n}.0pct.xes.gz"


noisyAddRes=reproduceNoisyExp(train_log=train_log,noisy_pattern=noisy_pattern_add)
noisyRemRes=reproduceNoisyExp(train_log=train_log,noisy_pattern=noisy_pattern_rem)

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/sldpn-reproducibility/experiments/2-splitlogs/Road_Traffic_Fine_Management_Process.xes.gz0.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpcpqy6h5e/Road_Traffic_Fine_Management_Process.xes.gz0.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 35635.14it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf

JAR File Path: scripts/jfitVlmc.jar
VLMC Mined In 13.642143964767456
/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_5.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpekdlyc94/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_5.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 33371.83it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_5.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_5.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 6ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_10.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpo5z3_u4g/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_10.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 36533.34it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_10.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_10.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 6ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_15.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpll9y8923/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_15.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 36421.66it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_15.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_15.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 8ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_20.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpxc9tkaq7/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_20.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 35287.16it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_20.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_20.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 6ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_25.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmp3r8fs5ku/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_25.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 32456.35it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_25.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_25.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 6ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_30.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmptd77udg0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_30.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 34574.84it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_30.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_30.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 6ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_35.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpe_28of0z/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_35.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 34762.86it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_35.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_35.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 8ms
saving traces
out.mat

/Users/emilio-imt/Desktop/sima-andrea/second_submission/data/noisy_data/Road_Traffic_Fine_Management_Process.xes.gz0/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_40.0pct.xes.gz
.gz file extracted to: /var/folders/th/tzthw0195kn8qkczx48fv6440000gn/T/tmpqft_oki8/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_40.0pct.xes


parsing log, completed traces ::   0%|          | 0/75123 [00:00<?, ?it/s]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 75123/75123 [00:02<00:00, 33467.67it/s]


Number of cases:75123
convert trace data/converted/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_40.0pct.txt /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0_noisy_rem_40.0pct.ecf
Output: /Users/emilio-imt/Desktop/sima-andrea/second_submission/data/VLMC/Road_Traffic_Fine_Management_Process.xes.gz0.ecf
0.0
Order:=12
VLMC total nodes 652
VLMC simulation time 6ms
saving traces
out.mat



In [34]:
print(noisyres_to_latex_table(noisyRemRes))

\begin{tabular}{cc}
\toprule
\%Noise & uEMSC \\
\midrule
5 & 0.949 \\
10 & 0.901 \\
15 & 0.854 \\
20 & 0.805 \\
25 & 0.757 \\
30 & 0.710 \\
35 & 0.662 \\
40 & 0.614 \\
\bottomrule
\end{tabular}

