# Experiment to check discoverability, conformance of different models

We run the following experiment with different random initial Models $M$:

1. Generate random model $M$
2. Simulate a log of 100k traces $L$ of model $M$
3. For $i$ in $[1,3,10,30,100,300,...,100000]$, use a growing subset $L'_i$ of $L$ with $|L'_i| = i$
4. Discover a model $M_d$ from $L'_{i}$ with an algorithm of your choice
5. Report precision and recall for:
  * $M$ vs. $L'_i$
  * $L'_i$ vs. $M_d$
  * $M$ vs. $M_d$

## Prepare classpath with maven repository, maven local, and some more jars

In [1]:
%maven dk.brics:automaton:1.12-1
%maven commons-logging:commons-logging:1.2
%maven org.apache.commons:commons-collections4:4.1
%maven org.apache.commons:commons-lang3:3.7
%maven org.apache.commons:commons-math3:3.6.1
%maven colt:colt:1.2.0
%maven jgraph:jgraph:5.13.0.0
%maven net.sf.trove4j:trove4j:3.0.3
%maven org.simpleframework:simple-xml:2.7.1
%maven io.github.andreas-solti.matrix-toolkits-java:mtj:1.0.8
%maven net.sourceforge.f2j:arpack_combined_all:0.1
%maven com.github.fommil.netlib:all:1.1.2

In [2]:
%%loadFromPOM
<repository>
    <id>openxes-repo</id>
    <url>file:////home/prom/openxes</url>
</repository>

<!-- Not available on Maven, local copy -->
<dependency>
    <groupId>org.deckfour</groupId>
    <artifactId>openxes</artifactId>
    <version>2.16</version>
</dependency>

<dependency>
    <groupId>io.github.andreas-solti.xeslite</groupId>
    <artifactId>xeslite</artifactId>
    <version>0.0.1</version>
</dependency>

In [3]:
List<String> addedJars = %jars /home/prom/lib/plugins/*.jar
List<String> addedJars2 = %jars /home/prom/lib/*.jar

In [4]:
addedJars

[/home/prom/lib/plugins/StochasticPetriNets.jar, /home/prom/lib/plugins/OpenXES.jar, /home/prom/lib/plugins/AntiAlignments.jar, /home/prom/lib/plugins/EfficientStorage.jar, /home/prom/lib/plugins/Properties.jar, /home/prom/lib/plugins/PTConversions.jar, /home/prom/lib/plugins/ProM-Models.jar, /home/prom/lib/plugins/XESStandard.jar, /home/prom/lib/plugins/AcceptingPetriNet.jar, /home/prom/lib/plugins/Widgets.jar, /home/prom/lib/plugins/ProM-Framework.jar, /home/prom/lib/plugins/PetriNets.jar, /home/prom/lib/plugins/openxes-2.16.jar, /home/prom/lib/plugins/ProjectedRecallAndPrecision.jar, /home/prom/lib/plugins/ProM-Contexts.jar, /home/prom/lib/plugins/ProcessTree.jar, /home/prom/lib/plugins/InductiveMiner.jar, /home/prom/lib/plugins/EvolutionaryTreeMiner.jar, /home/prom/lib/plugins/ProM-Plugins.jar, /home/prom/lib/plugins/PNetReplayer.jar]

## Handle imports 

In [5]:
import java.util.stream.IntStream;
import org.deckfour.xes.info.XLogInfo;
import org.deckfour.xes.info.impl.XLogInfoImpl;
import org.deckfour.xes.info.XLogInfoFactory;
import org.deckfour.xes.model.XLog;
import org.deckfour.xes.classification.XEventClassifier;
import org.deckfour.xes.classification.XEventClasses;

import org.processmining.acceptingpetrinet.models.AcceptingPetriNet;
import org.processmining.acceptingpetrinet.models.impl.AcceptingPetriNetImpl;
import org.processmining.eigenvalue.Utils;
import org.processmining.eigenvalue.automata.PrecisionRecallComputer;
import org.processmining.eigenvalue.data.EntropyPrecisionRecall;
import org.processmining.eigenvalue.generator.GenerateLogAndModel;
import org.processmining.eigenvalue.generator.NAryTreeGenerator;
import org.processmining.eigenvalue.tree.TreeUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.processmining.plugins.etm.model.narytree.NAryTree;
import org.processmining.plugins.stochasticpetrinet.StochasticNetUtils;

import org.processmining.projectedrecallandprecision.helperclasses.ProjectPetriNetOntoActivities;
import org.processmining.projectedrecallandprecision.helperclasses.AcceptingPetriNet2automaton;
import org.processmining.projectedrecallandprecision.helperclasses.AutomatonFailedException;
import org.processmining.projectedrecallandprecision.helperclasses.EfficientLog;
import com.google.common.base.Stopwatch;

import org.processmining.eigenvalue.test.TestUtils;

import dk.brics.automaton2.Automaton;
import org.processmining.plugins.etm.model.narytree.conversion.NAryTreeToProcessTree;
import org.processmining.processtree.ProcessTree;
import org.processmining.ptconversions.pn.ProcessTree2Petrinet;
import org.processmining.ptconversions.pn.ProcessTree2Petrinet.NotYetImplementedException;
import org.processmining.ptconversions.pn.ProcessTree2Petrinet.InvalidProcessTreeException;

import org.processmining.plugins.InductiveMiner.efficienttree.EfficientTree;
import org.processmining.plugins.InductiveMiner.efficienttree.EfficientTree2processTree;
import org.processmining.plugins.InductiveMiner.mining.MiningParameters;
import org.processmining.plugins.inductiveminer2.mining.InductiveMiner;
import org.processmining.plugins.inductiveminer2.variants.MiningParametersIMInfrequent;
import org.processmining.plugins.InductiveMiner.mining.logs.LifeCycleClassifier;
import org.processmining.framework.packages.PackageManager;

# Set Experiment PARAMETERS

In [6]:
String OUTPUT_FOLDER = "output"; // the results will be put here
int NUM_ACTIVITIES = 15; // how big shall the model be?

int EXPERIMENT_RUNS = 30; // number of repetitions of the experiment with different random seeded models
int[] sublogSizes = new int[]{1,3,10,30,100,300,1000,3000,10000,30000,100000}; // gradual increments in log size following a rough exponential pattern for plotting

float INDUCTIVE_MINER_THRESHOLD = 0.2f; // the default parameter for the inductive miner (infrequent)

# 1. and 2. Generate random model $M$ and simulate log $L$
The random seed of the log generation is set to 1 by default. This way, the log will be the same if GenerateLogAndModel is used twice with the same model tree.

In [7]:
public static final XEventClassifier CLASSIFIER = XLogInfoImpl.NAME_CLASSIFIER;

In [8]:
public Pair<XLog, NAryTree> generate(int traces, int activities, NAryTreeGenerator generator, long seed){
    generator.setSeed(seed);
    NAryTree tree = generator.generate(activities);

    XLog newLog = GenerateLogAndModel.generateLog(tree,traces);
    return new MutablePair<>(newLog, tree);
}
public Pair<XLog, NAryTree> generate(int traces, int activities, long seed){
    NAryTreeGenerator generator = new NAryTreeGenerator();
    return generate(traces, activities, generator, seed);
}
public Pair<XLog, NAryTree> generate(int traces, int activities){
    return generate(traces, activities, 42l);
}

## Oddly, this next line fails at first run... <br> At second try, however, it succeeds o_O

In [10]:
NAryTreeGenerator generator = new NAryTreeGenerator();

In [11]:
/**
 * Helper method to compute the entropy-based precision/recall measures between two models
 */
public EntropyPrecisionRecall getPrecisionAndRecall(AcceptingPetriNet firstNet, AcceptingPetriNet secondNet){
    String name1 = Utils.getName(firstNet.getNet(),"Md");
    String name2 = Utils.getName(secondNet.getNet(),"M");

    String[] names = PrecisionRecallComputer.getTransitionNames(firstNet, new String[]{});
    names = PrecisionRecallComputer.getTransitionNames(secondNet, names);

    Automaton a1 = getAutomaton(firstNet, names);
    Automaton a2 = getAutomaton(secondNet, names);

    Automaton a12 = a1.intersection(a2, Utils.NOT_CANCELLER);

    return PrecisionRecallComputer.getPrecisionAndRecall(a1, name1, a2, name2, a12, "MdM", a12.getNumberOfStates() / (double)a1.getNumberOfStates(), Utils.NOT_CANCELLER);
}

In [12]:
/**
 * Converts a @{@link AcceptingPetriNet} to an @{@link Automaton}.
 * @param net {@link AcceptingPetriNet} to convert.
 * @param activities {@link String}[] array that captures the names in the other part, if names should be converted.
 * @return Automaton the automaton of the model projected onto the
 */
public Automaton getAutomaton(AcceptingPetriNet net, String[] activities){
    String[] names = PrecisionRecallComputer.getTransitionNames(net, activities);
    System.out.println(""+names);
    AcceptingPetriNet projectedNet = ProjectPetriNetOntoActivities.project(net, Utils.NOT_CANCELLER, names);
    Automaton a = null;
    try {
        a = AcceptingPetriNet2automaton.convert(projectedNet, Integer.MAX_VALUE, Utils.NOT_CANCELLER);
    } catch (AutomatonFailedException e){
        e.printStackTrace();
        System.out.println("Error getting Automaton!");
    }
    return a;
}

public Automaton getAutomaton(AcceptingPetriNet net){
    return getAutomaton(net, new String[]{});
}



public ProcessTree mineTree(XLog xLog, float noiseThreshold){
    XEventClassifier classifier = MiningParameters.getDefaultClassifier();
    org.processmining.plugins.inductiveminer2.logs.IMLog log = new org.processmining.plugins.inductiveminer2.logs.IMLogImpl(xLog, classifier, new LifeCycleClassifier());
    MiningParametersIMInfrequent miningParameters = new MiningParametersIMInfrequent();
    miningParameters.setDebug(false);
    EfficientTree eTree = InductiveMiner.mineEfficientTree(log, miningParameters, new PackageManager.Canceller() {
        @Override
        public boolean isCancelled() {
            return false;
        }
    });

    return EfficientTree2processTree.convert(eTree);
}

# 3. Select growing number of traces from the log

In [13]:
public AcceptingPetriNet convertProcessTreeToNet(ProcessTree processTree, int numActivities) {
    try{
        XEventClasses eventClasses = TestUtils.getxEventClasses(CLASSIFIER, numActivities);
        
        ProcessTree2Petrinet.PetrinetWithMarkings petrinetWithMarkings = ProcessTree2Petrinet.convert(processTree, true);
        AcceptingPetriNet acceptingPetriNet = new AcceptingPetriNetImpl(petrinetWithMarkings.petrinet, petrinetWithMarkings.initialMarking, petrinetWithMarkings.finalMarking);
        return acceptingPetriNet;
    } catch (NotYetImplementedException | InvalidProcessTreeException e){
        e.printStackTrace();
        System.err.println("Error!");
        return null;
    }
}

public AcceptingPetriNet convertToNet(NAryTree tree){
    int numActivities = tree.numLeafs();
    XEventClasses eventClasses = TestUtils.getxEventClasses(CLASSIFIER, numActivities);
    ProcessTree processTree = NAryTreeToProcessTree.convert(tree, eventClasses);
    return convertProcessTreeToNet(processTree, numActivities);
}

# Try loading a model from pnml file

In [27]:
import org.simpleframework.xml.Serializer;
import org.simpleframework.xml.core.Persister;
import org.processmining.plugins.pnml.simple.PNMLRoot;
import org.processmining.plugins.pnml.importing.StochasticNetDeserializer;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import org.processmining.projectedrecallandprecision.helperclasses.ProjectPetriNetOntoActivities;
import org.processmining.models.graphbased.directed.petrinet.StochasticNet;
import org.processmining.models.semantics.petrinet.Marking;

In [25]:
public Automaton openNetAsAutomaton(String filename) throws Exception{
    File file = new File(filename);
    Serializer serializer = new Persister();
    PNMLRoot pnml = serializer.read(PNMLRoot.class, new FileInputStream(file));

    StochasticNetDeserializer converter = new StochasticNetDeserializer();
    Object[] result = converter.convertToNet(null, pnml, filename, false);
    
    StochasticNet sNet = (StochasticNet) result[0];
    Marking initMarking = (Marking) result[1];
    AcceptingPetriNet acceptingPetriNet = new AcceptingPetriNetImpl(sNet, initMarking, StochasticNetUtils.getFinalMarking(null, sNet));
    
    return PrecisionRecallComputer.getAutomaton(acceptingPetriNet);
}

In [28]:
String filename = "./data/Figure5.pnml";
Automaton autom = openNetAsAutomaton(filename);

Assuming race enabling memory for net noID imported from (./data/Figure5.pnml)
Assuming 'minutes' as the time unit in net noID imported from (./data/Figure5.pnml)
[Ljava.lang.String;@47d87bb8


In [30]:
autom.toDot()

digraph Automaton {
  rankdir = LR;
  0 [shape=circle,label=""];
  initial [shape=plaintext,label=""];
  initial -> 0
  0 -> 1 [label="\u0005"]
  1 [shape=circle,label=""];
  1 -> 2 [label="\u0004"]
  1 -> 3 [label="\u0000"]
  2 [shape=circle,label=""];
  2 -> 2 [label="\u0003"]
  2 -> 4 [label="\u0002"]
  2 -> 2 [label="\u0001"]
  3 [shape=doublecircle,label=""];
  4 [shape=circle,label=""];
  4 -> 3 [label="\u0000"]
}


In [None]:
public static void runExperiment(int runNumber, XLog log, AcceptingPetriNet acceptingPetriNet, String outputFolder,int[] sublogSizes){
    File outFolder = new File(outputFolder + File.separator + runNumber);
    if (!outFolder.exists()){
        outFolder.mkdirs();
    }
    
    for (int i : sublogSizes){
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outFolder, "exp_results_"+i+".csv")))) {
            writer.write(EntropyPrecisionRecall.getHeader()+"\n");

            XLog subLog = Utils.cloneLog(log, i);
            
            System.out.println("Running with log size: "+subLog.size());

            Stopwatch timer = Stopwatch.createStarted();
            EntropyPrecisionRecall resModelLog = PrecisionRecallComputer.getPrecisionAndRecall(null, Utils.NOT_CANCELLER, subLog,  acceptingPetriNet);
            writer.write(resModelLog.getCSVString()+"\n");
            writer.flush();
            
            System.out.println("Computing recall/precision of sublog/model took: " + timer.stop()); timer.reset(); timer.start();
            
            
            ProcessTree modelDiscovered = mineTree(subLog, INDUCTIVE_MINER_THRESHOLD); 
            System.out.println("Discovery of m_discov from sublog took: " + timer.stop()); timer.reset(); timer.start();
            
            AcceptingPetriNet petriNetDiscovered = convertProcessTreeToNet(modelDiscovered, modelDiscovered.size());

            
            EntropyPrecisionRecall resLogDiscModel = PrecisionRecallComputer.getPrecisionAndRecall(null, Utils.NOT_CANCELLER, subLog,  petriNetDiscovered);
            writer.write(resLogDiscModel.getCSVString()+"\n");
            
            System.out.println("Computing recall/precision of sublog/m_discov: " + timer.stop()); timer.reset(); timer.start();

            EntropyPrecisionRecall resModelDiscModel = getPrecisionAndRecall(acceptingPetriNet, petriNetDiscovered);   
            writer.write(resModelDiscModel.getCSVString()+"\n");
            System.out.println("Computing recall/precision of m_discov/model: " + timer.stop()); 

            writer.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

In [None]:
IntStream.range(0, EXPERIMENT_RUNS).forEachOrdered(n -> {
    System.out.println("**** Running Experiment "+n+" ****");
    
    // generate Log and Model
    Pair<XLog,NAryTree> pair = generate(100000, NUM_ACTIVITIES, generator, 42+n);
    XLog log = pair.getLeft();
    NAryTree tree = pair.getRight();
    System.out.println("Generated log and model with sizes: " + log.size()+" - "+tree.numLeafs()+" (nodes: "+tree.size()+")");
    
    // convert tree to accepting Petri net
    AcceptingPetriNet acceptingPetriNet = convertToNet(tree);
    
    // run experiment with different sublog sizes:
    runExperiment(n, log, acceptingPetriNet, OUTPUT_FOLDER, sublogSizes);
});