In [None]:
# %pip install pm4py
# %pip install pyemd

import os
import time
import pickle
import pandas as pd
import numpy as np
import subprocess
import multiprocessing as mp
import argparse
import pathlib
from tqdm import tqdm
import warnings
import xml.parsers.expat

# from google.colab import drive

import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.objects.log.exporter.xes import exporter as xes_exporter
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.objects.petri_net.importer import importer as pnml_importer
from pm4py.algo.simulation.playout.petri_net import algorithm as simulator
from pm4py.statistics.variants.log import get as variants_module
from pm4py.algo.evaluation.earth_mover_distance import algorithm as emd_evaluator
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.util import dataframe_utils
import random

warnings.filterwarnings("ignore", category=UserWarning, module='pm4py.utils')
warnings.filterwarnings("ignore", category=UserWarning, module='pm4py.objects.stochastic_petri')
warnings.filterwarnings("ignore", category=UserWarning, module='pm4py.algo.simulation.montecarlo')

os.chdir(r'/users/sima/Desktop/stochastic')


# ***Recieved by emilio (I made some changes)***

In [None]:

xml_tranition=None
xml_tranitions=[]
xml_weight=False
######################################################################################################################################


def start_element(name, attrs):
    global xml_tranition,xml_weight
    if(name=="transition"):
        xml_tranition={"id":attrs["id"]}
    if(name=="property"):
        if(attrs["key"]=="weight"):
            xml_weight=True
######################################################################################################################################

def end_element(name):
    global xml_tranition, xml_tranitions
    if(name=="transition"):
        xml_tranitions.append(xml_tranition)
        xml_tranition=None
######################################################################################################################################

def char_data(data):
    global xml_weight, xml_tranition
    if(xml_weight):
        xml_tranition["weight"]=float(data)
        xml_weight=False
######################################################################################################################################

def getxmlTransition_byname(name):
    for t in xml_tranitions:
        if(t["id"]==name):
            return t
######################################################################################################################################

def get_stoch_map(pn, pnmlfile):
    transitions = pn.transitions

    with open(pnmlfile, "r") as f:
        xml_content = f.read()

    p = xml.parsers.expat.ParserCreate()
    p.StartElementHandler = start_element
    p.EndElementHandler = end_element
    p.CharacterDataHandler = char_data
    p.Parse(xml_content)

    smap = {}
    for t in transitions:
        xml_t = getxmlTransition_byname(t.name)
        if xml_t is not None:
            smap[t] = StochTrans(xml_t["weight"])
        else:
            print(f"Warning: Transition {t.name} not found in the XML.")

    return smap


######################################################################################################################################


def printProress(value,total):
    while(value.value<total-1):
        print("   "+str(value.value*100/total)+"%",end = "\r")
        time.sleep(1.0)
######################################################################################################################################

def saveSimLog(log=None,outlogFile=None):
    print(outlogFile)
    outtraces=open(outlogFile,"w+")
    for t in log:
        trace=""
        for t1 in t:
            if(t1["concept:name"]=="tau"):
                pass
            else:
                if(trace==""):
                    trace=t1["concept:name"]
                else:
                    trace+=",,"+t1["concept:name"]
        outtraces.write(trace+"\n")
    outtraces.close()

######################################################################################################################################



class StochTrans:
    weight = None

    def __init__(self, weight):
        self.weight = weight

    def get_weight(self):
        return self.weight

    def __str__(self):
        return f"StochTrans(weight={self.weight})"



######################################################################################################################################


def get_place_for_event(pn, event_name):
    for place in pn.places:
        if event_name in place.name:
            return place
    return None


######################################################################################################################################
def simSPN(pnfile=None,queue=None,nrun=1,lock=None,value=None):
    pn=None
    if(type(pnfile)==str):
        pn,im, fm = pm4py.read_pnml(pnfile)
    else:
        pn=pnfile

    im = pm4py.generate_marking(pn, {'pI': 1})

    smap=get_stoch_map(pn,str(pnfile))

    simulated_log = simulator.apply(pn, im,variant=simulator.Variants.STOCHASTIC_PLAYOUT,
        parameters={simulator.Variants.STOCHASTIC_PLAYOUT.value.Parameters.NO_TRACES: nrun,
                    simulator.Variants.STOCHASTIC_PLAYOUT.value.Parameters.STOCHASTIC_MAP:smap  })
    
    filename = f'simulated_log_nrun{nrun}.xes'
    saveSimLog(simulated_log, filename)

    if(lock is not None):
        lock.acquire()
        value.value+=1
        lock.release()

    if(queue is not None):
        queue.put(simulated_log[0])
    else:
        return simulated_log

# def simSPN(pn,pnfile, start_transition_id, end_transition_id, nrun=1):
    
#     start_transition = next((trans for trans in pn.transitions if trans.name == start_transition_id), None)
#     end_transition   = next((trans for trans in pn.transitions if trans.name == end_transition_id), None)
#     if not start_transition or not end_transition:
#         raise ValueError("Start or end transition not found in the Petri net")

#     initial_marking, final_marking = set_markings(pn, start_transition, end_transition)

#     smap = get_stoch_map(pn, str(pnfile))

#     # Perform the simulation
#     simulated_log = simulator.apply(pn, initial_marking, final_marking, variant=simulator.Variants.STOCHASTIC_PLAYOUT,
#                                     parameters={simulator.Variants.STOCHASTIC_PLAYOUT.value.Parameters.NO_TRACES: nrun,
#                                                 simulator.Variants.STOCHASTIC_PLAYOUT.value.Parameters.STOCHASTIC_MAP: smap})
#     return simulated_log

######################################################################################################################################

def set_markings(pn, start_transition, end_transition):
    initial_marking = Marking()
    final_marking = Marking()

    for arc in pn.arcs:
        if arc.target == start_transition:
            initial_marking[arc.source] = 1
            break

    for arc in pn.arcs:
        if arc.source == end_transition:
            final_marking[arc.target] = 1
            break

    return initial_marking, final_marking


# number traces at once

In [None]:
#########################################################################################################


def perform_analysis(num_groups,simulated_language, test_log,seed=None):
    test_log = test_log.copy()

    test_log['case:concept:name'] = test_log['case:concept:name'].astype(str)
    test_log['concept:name']      = test_log['concept:name'].astype(str)
    test_log['time:timestamp']    = pd.to_datetime(test_log['time:timestamp'], utc=True)

    case_ids = test_log['case:concept:name'].unique()
    if seed is not None:
        random.seed(seed)
        np.random.shuffle(case_ids)

    
    grouped_case_ids = np.array_split(case_ids, num_groups)

    emd_results = []

    for group_case_ids in grouped_case_ids:
        group_df = test_log[test_log['case:concept:name'].isin(group_case_ids)]

        if not group_df.empty:
            traditional_log = pm4py.convert_to_event_log(group_df)
            case_language = variants_module.get_language(traditional_log)
            emd_value = emd_evaluator.apply(simulated_language, case_language)
            emd_results.append(emd_value)

    emd_df = pd.DataFrame({'EMD Value': emd_results})

    return emd_df




#########################################################################################################

def main(nrun, full_log1, num_groups,seed):
    global results_df
    start_time = time.time()
    simulated_log = simSPN(pnfile=pnml_path, queue=None, nrun=1, lock=None, value=None)  
    simulated_language = get_language_model(simulated_log)
    simulation_time = time.time() - start_time
    
    emd_estimation_start_time_group = time.time()
    emd_df = perform_analysis(num_groups, simulated_language, full_log1)
    emd_estimation_time_group = time.time() - emd_estimation_start_time_group

    print(f"Time spent for analysis with nrun= {nrun}", simulation_time)

    return simulation_time, emd_df,emd_estimation_time_group

group_counts = [1, 2,10 ,50] 
# nruns = [1, 10, 50, 100, 500, 1000, 5000]

nruns = [5000]
seed = 117

tracking_df_group = pd.DataFrame(columns=['nrun', 'Num Groups', 'Simulation Time', 'EMD Estimation Time', 'EMD CSV File'])

for nrun in nruns:
    for num_groups in group_counts:
        print(f'emd_df for nrun= {nrun}, num_groups= {num_groups} is started')

        simulation_time, emd_df, emd_estimation_time_group = main(nrun, full_log1, num_groups, seed)
        csv_file_path = f"/users/sima/Desktop/stochastic/groups/emd_result_nrun{nrun}_nGroup{num_groups}.csv"
        
        emd_df.to_csv(csv_file_path, index=False)
        print(f'emd_df for nrun= {nrun}, num_groups= {num_groups} is produced')

        new_row = {
            'nrun': nrun,
            'Num Groups': num_groups,
            'Simulation Time': simulation_time,
            'EMD Estimation Time': emd_estimation_time_group,
            'EMD CSV File': csv_file_path
        }
        new_row_df = pd.DataFrame([new_row])
        tracking_df_group = pd.concat([tracking_df_group, new_row_df], ignore_index=True)

        print("-------------------------------------------------------------------------------------------------")

    tracking_df_group.to_csv("/users/sima/Desktop/stochastic/groups/nrun_tracking_groups.csv", index=False)
    print(f'tracking_df updated for nrun= {nrun}')

print('Analysis finished')



