In [1]:
#All the imports here
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.visualization.process_tree import visualizer as pt_visualizer
from pm4py.objects.conversion.process_tree import converter as pt_converter
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization
from pm4py.objects.conversion.dfg import converter as dfg_mining
from pm4py.visualization.petrinet import factory as pn_vis_factory
from collections import defaultdict 
import pandas as pd
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.adapters.pandas import csv_import_adapter
from pm4py.objects.conversion.log import factory as conversion_factory
from pm4py.util import constants


In [2]:
%config IPCompleter.greedy=True

In [3]:
def explore_log(log):
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))

In [4]:
def find_duplicate_events(x): 
    _size = len(x) 
    duplicate_list = [] 
    for i in range(_size): 
        k = i + 1
        for j in range(k, _size): 
            if x[i] == x[j] and x[i] not in duplicate_list: 
                duplicate_list.append(x[i]) 
    return duplicate_list

In [5]:
#This class represents a directed graph using dfg 
class Graph: 

    def __init__(self,vertices): 
        self.V= vertices #No. of vertices 
        self.graph = defaultdict(list) # default dictionary to store graph 

    # function to add an edge to graph 
    def addEdge(self,u,v): 
        self.graph[u].append(v) 
    
    # Use BFS to check path between s and d 
    def isReachable(self, s, d): 
        # Mark all the vertices as not visited 
        visited =[False]*(self.V) 

        # Create a queue for BFS 
        queue=[]

        # Mark the source node as visited and enqueue it 
        queue.append(s) 
        visited[s] = True

        while queue: 

            #Dequeue a vertex from queue 
            n = queue.pop(0) 

            # If this adjacent node is the destination node, 
            # then return true 
            if n == d: 
                return True

            # Else, continue to do BFS 
            for i in self.graph[n]: 
                if visited[i] == False: 
                    queue.append(i) 
                    visited[i] = True
        # If BFS is complete without visited d 
        return False



In [6]:
def find_weakness(log, forbidden_sequence):
    #Weakness 1: Duplicate or loop-> Same event repeating twice in the log
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        event_list=[]
        
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))
            event_list.append(event["concept:name"])
            
        print ("The events which got repeated in the trace are",find_duplicate_events(event_list))
        



    #Weakness 2: Find out if the forbidden sequence of events exists in the log
    #applying the Directly follows graph discovery to get the sequence which are directly following each other
    dfg_simple = dfg_discovery.apply(log)
    violated_restrictions=[]#for directly following each other or indirectly following
    for r in forbidden_sequence:
        count=0
        for d in dfg_simple.elements():
            if(r==d):
                count+=1
                violated_restrictions.append((r,count))
            #else if(r[0]==d[0]):
            
              
                
    print("Violated restrictions, Number of times violated: ",violated_restrictions)

In [7]:

# Create a graph for the given dfg

log = xes_importer.apply('running-example.xes')
dfg_simple = dfg_discovery.apply(log)

    
g = Graph(len(list(dfg_simple.elements())))
l=[]
for t in dfg_simple.elements(): 
    for x in t: 
        l.append(x) 
l=list(set(l))#list mapping every element to a number
for d in dfg_simple.elements():
    g.addEdge(l.index(d[0]),l.index(d[1]))
    

u =l.index("register request"); v = l.index("decide")

if g.isReachable(u, v): 
    print("There is a path from %s to %s" % (l[u],l[v])) 
else : 
    print("There is no path from %s to %s" % (l[u],l[v])) 



HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


There is a path from register request to decide


In [8]:
log = xes_importer.apply('running-example.xes')
dfg_simple = dfg_discovery.apply(log)

for case_index, case in enumerate(log):
    print(type(case))
    #dfg_simple1 = dfg_discovery.apply(case)
    

print(type(log))

HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.EventLog'>


In [9]:
from pm4py.objects.dfg.utils import dfg_utils
from pm4py.objects.petri.petrinet import PetriNet, Marking
from pm4py.objects.petri import utils as pn_util
from enum import Enum
from pm4py.util import exec_utils


class Parameters(Enum):
    START_ACTIVITIES = 'start_activities'
    END_ACTIVITIES = 'end_activities'



PARAM_KEY_START_ACTIVITIES = Parameters.START_ACTIVITIES
PARAM_KEY_END_ACTIVITIES = Parameters.END_ACTIVITIES

#obtain petrinet from dfg
def obtain_petrinet_from_dfg(dfg, parameters=None):
    """
    Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated)

    Parameters
    -------------
    dfg
        Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated)
    parameters
        Parameters
    """
    if parameters is None:
        parameters = {}

    dfg = dfg
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters,
                                                  dfg_utils.infer_start_activities(
                                                      dfg))
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters,
                                                dfg_utils.infer_end_activities(dfg))
    activities = dfg_utils.get_activities_from_dfg(dfg)

    net = PetriNet("")
    im = Marking()
    fm = Marking()

    source = PetriNet.Place("source")
    net.places.add(source)
    im[source] = 1
    sink = PetriNet.Place("sink")
    net.places.add(sink)
    fm[sink] = 1

    places_corr = {}
    index = 0

    for act in activities:
        places_corr[act] = PetriNet.Place(act)
        net.places.add(places_corr[act])

    for act in start_activities:
        if act in places_corr:
            index = index + 1
            trans = PetriNet.Transition(act + "_" + str(index), act)
            net.transitions.add(trans)
            pn_util.add_arc_from_to(source, trans, net)
            pn_util.add_arc_from_to(trans, places_corr[act], net)

    for act in end_activities:
        if act in places_corr:
            index = index + 1
            inv_trans = PetriNet.Transition(act + "_" + str(index), None)
            net.transitions.add(inv_trans)
            pn_util.add_arc_from_to(places_corr[act], inv_trans, net)
            pn_util.add_arc_from_to(inv_trans, sink, net)

    for el in dfg.keys():
        act1 = el[0]
        act2 = el[1]

        index = index + 1
        trans = PetriNet.Transition(act2 + "_" + str(index), act2)
        net.transitions.add(trans)

        pn_util.add_arc_from_to(places_corr[act1], trans, net)
        pn_util.add_arc_from_to(trans, places_corr[act2], net)

    return net, im, fm



In [10]:
def Unwanted_Activity(log, blacklist):
    print("Unwanted activity function")
    for case_index, case in enumerate(log):
        for event_index, event in enumerate(case):
            if(event["concept:name"] in blacklist):
                print("Duplicate activity=> activity: %s -> case: %d  " % (event["concept:name"], case_index))
    print()
            

In [11]:
def Backloop(log):
    print("Backloop function")
    
    #for trace in event_log:
    #    print(trace)

In [12]:
def Redundant_Activity(log):
    print("Redundant_Activity function")
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        event_list=[]
        
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))
            event_list.append(event["concept:name"])
            
        print ("The events which got repeated in the trace are",find_duplicate_events(event_list))

In [13]:
def Interface():
    print("Interface function")

In [14]:
def Switch_of_media():
    print("Switch_of_media function")

In [15]:
def Idle_time():
    print("Idle_time function")

In [16]:
def Variance_of_process_times():
    print("Variance_of_process_times function")

In [17]:
def Bottleneck():
    print("Bottleneck function")
    

In [18]:
def Parallelizable_tasks():
    print("Parallelizable_tasks function")

In [23]:
# Defining main function 
def main(): 
    print("Welcome to Joint Master thesis:\nModelling of production expertise to extend the data-driven analysis of process models") 
    
    '''#Import a log
    log = xes_importer.apply('running-example.xes')
    print("Log imported")
    
    #Explore the log
    #explore_log(log)
    
    #Define the forbidden sequence of events
    #simple restriction which says you cannot decide without examining thoroughly 
    forbidden_sequence=[( 'decide','examine thoroughly')]
    
    #Find different kinds of weakness in the log
    find_weakness(log, forbidden_sequence)
    
    #obtain_petrinet_from_dfg
    dfg_simple = dfg_discovery.apply(log)
    net, im, fm = obtain_petrinet_from_dfg(dfg_simple)

    #Visualise the petrinet obtained
    gviz = pn_vis_factory.apply(net, im, fm)
    pn_vis_factory.view(gviz)'''
    
    #log = xes_importer.apply('running-example.xes')
    log_csv = pd.read_csv('Production_Data.csv', sep=',')
    log = conversion_factory.apply(log_csv, parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                   constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                    constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY:"Start Timestamp",
                                                    constants.PARAMETER_CONSTANT_RESOURCE_KEY:"Resource",
                                                    constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:"Complete Timestamp"
                                                   })
    print("Log imported\n\n\n")
    
  
    
    '''print("Menu:\n 1.Unwanted_Activity\n 2.Backloop\n 3.Redundant_Activity\n 4.Interface\n 5.Switch_of_media\n 6.Idle_time\n 7.Variance_of_process_times\n 8.Bottleneck\n 9.Parallelizable_tasks\n 0.Exit ")
    menuIndex = input("Enter the menu index:")


    if menuIndex==1:
        blacklist=[ 'decide','examine thoroughly']
        Unwanted_Activity(log, blacklist)
        print("index is 1")
    elif menuIndex== 2:    
        Backloop()

    elif menuIndex== 3:
        Redundant_Activity(log)

    elif menuIndex== 4:
        Interface()

    elif menuIndex== 5:
        Switch_of_media()

    elif menuIndex== 6:
        Idle_time()

    elif menuIndex== 7:
        Variance_of_process_times()

    elif menuIndex== 8:
        Bottleneck()

    elif menuIndex== 9:
        Parallelizable_tasks()
    '''
    
    blacklist=[ 'decide','examine thoroughly']
    Unwanted_Activity(log, blacklist)
    
    #Backloop(log)
    
    #Redundant_Activity(log) 
    
    #Interface()
    
    #Switch_of_media()
    
    #Idle_time()
    
    #Variance_of_process_times()
    
    #Bottleneck()
    
    #Parallelizable_tasks()
main()


Welcome to Joint Master thesis:
Modelling of production expertise to extend the data-driven analysis of process models
Log imported



Unwanted activity function




KeyError: 'concept:name'

In [20]:
#Calling main function
if __name__=="__main__": 
    main() 

Welcome to Joint Master thesis:
Modelling of production expertise to extend the data-driven analysis of process models


HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


Log imported



Backloop function


In [21]:
log_csv = pd.read_csv('Production_Data.csv', sep=',')
log = conversion_factory.apply(log_csv, parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                   constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                    constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY:"Start Timestamp",
                                                    constants.PARAMETER_CONSTANT_RESOURCE_KEY:"Resource",
                                                    constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:"Complete Timestamp"
                                                   })
#log_csv.rename(columns={'clientID': 'case:clientID'}, inplace=True
#parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'case'}parameters=parameters
#event_log = log_converter.apply(log_csv, variant=log_converter.Variants.TO_EVENT_LOG)
print(log)

[{'attributes': {'concept:name': 'Case 1'}, 'events': [{'Case ID': 'Case 1', 'Activity': 'Turning & Milling - Machine 4', 'Resource': 'Machine 4 - Turning & Milling', 'Start Timestamp': '1/29/2012 23:24:00', 'Complete Timestamp': '1/30/2012 5:43:00', 'Span': '006:19', 'Work Order  Qty': 10, 'Part Desc.': 'Cable Head', 'Worker ID': 'ID4932', 'Report Type': 'S', 'Qty Completed': 1, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '19:00.0'}, '..', {'Case ID': 'Case 1', 'Activity': 'Packing', 'Resource': 'Packing', 'Start Timestamp': '2/17/2012 0:00:00', 'Complete Timestamp': '2/17/2012 1:00:00', 'Span': '000:00', 'Work Order  Qty': 10, 'Part Desc.': 'Cable Head', 'Worker ID': 'ID4820', 'Report Type': 'D', 'Qty Completed': 9, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '00:00.0'}]}, '....', {'attributes': {'concept:name': 'Case 99'}, 'events': [{'Case ID': 'Case 99', 'Activity': 'Turning & Milling Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': 

  
