In [35]:
#All the imports here
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.visualization.process_tree import visualizer as pt_visualizer
from pm4py.objects.conversion.process_tree import converter as pt_converter
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization
from pm4py.objects.conversion.dfg import converter as dfg_mining
from pm4py.visualization.petrinet import factory as pn_vis_factory
from collections import defaultdict 
import pandas as pd
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.adapters.pandas import csv_import_adapter
from pm4py.objects.conversion.log import factory as conversion_factory
from pm4py.util import constants
import math
from datetime import date
import numpy as np
from pm4py.objects.log.util import sorting

In [36]:
%config IPCompleter.greedy=True

In [46]:
def explore_log(log):
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))
            

In [4]:
#This class represents a directed graph using dfg 
class Graph: 

    def __init__(self,vertices): 
        self.V= vertices #No. of vertices 
        self.graph = defaultdict(list) # default dictionary to store graph 

    # function to add an edge to graph 
    def addEdge(self,u,v): 
        self.graph[u].append(v) 
    
    # Use BFS to check path between s and d 
    def isReachable(self, s, d): 
        # Mark all the vertices as not visited 
        visited =[False]*(self.V) 

        # Create a queue for BFS 
        queue=[]

        # Mark the source node as visited and enqueue it 
        queue.append(s) 
        visited[s] = True

        while queue: 

            #Dequeue a vertex from queue 
            n = queue.pop(0) 

            # If this adjacent node is the destination node, 
            # then return true 
            if n == d: 
                return True

            # Else, continue to do BFS 
            for i in self.graph[n]: 
                if visited[i] == False: 
                    queue.append(i) 
                    visited[i] = True
        # If BFS is complete without visited d 
        return False



In [5]:
def find_weakness(log, forbidden_sequence):
    #Weakness 1: Duplicate or loop-> Same event repeating twice in the log
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        event_list=[]
        
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))
            event_list.append(event["concept:name"])
            
        print ("The events which got repeated in the trace are",find_duplicate_events(event_list))
        



    #Weakness 2: Find out if the forbidden sequence of events exists in the log
    #applying the Directly follows graph discovery to get the sequence which are directly following each other
    dfg_simple = dfg_discovery.apply(log)
    violated_restrictions=[]#for directly following each other or indirectly following
    for r in forbidden_sequence:
        count=0
        for d in dfg_simple.elements():
            if(r==d):
                count+=1
                violated_restrictions.append((r,count))
            #else if(r[0]==d[0]):
            
              
                
    print("Violated restrictions, Number of times violated: ",violated_restrictions)

In [6]:

# Create a graph for the given dfg

log = xes_importer.apply('running-example.xes')
dfg_simple = dfg_discovery.apply(log)

    
g = Graph(len(list(dfg_simple.elements())))
l=[]
for t in dfg_simple.elements(): 
    for x in t: 
        l.append(x) 
l=list(set(l))#list mapping every element to a number
for d in dfg_simple.elements():
    g.addEdge(l.index(d[0]),l.index(d[1]))
    

u =l.index("register request"); v = l.index("decide")

if g.isReachable(u, v): 
    print("There is a path from %s to %s" % (l[u],l[v])) 
else : 
    print("There is no path from %s to %s" % (l[u],l[v])) 



HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


There is a path from register request to decide


In [7]:
log = xes_importer.apply('running-example.xes')
dfg_simple = dfg_discovery.apply(log)

for case_index, case in enumerate(log):
    print(type(case))
    #dfg_simple1 = dfg_discovery.apply(case)
    

print(type(log))

HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.EventLog'>


In [8]:
from pm4py.objects.dfg.utils import dfg_utils
from pm4py.objects.petri.petrinet import PetriNet, Marking
from pm4py.objects.petri import utils as pn_util
from enum import Enum
from pm4py.util import exec_utils


class Parameters(Enum):
    START_ACTIVITIES = 'start_activities'
    END_ACTIVITIES = 'end_activities'



PARAM_KEY_START_ACTIVITIES = Parameters.START_ACTIVITIES
PARAM_KEY_END_ACTIVITIES = Parameters.END_ACTIVITIES

#obtain petrinet from dfg
def obtain_petrinet_from_dfg(dfg, parameters=None):
    """
    Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated)

    Parameters
    -------------
    dfg
        Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated)
    parameters
        Parameters
    """
    if parameters is None:
        parameters = {}

    dfg = dfg
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters,
                                                  dfg_utils.infer_start_activities(
                                                      dfg))
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters,
                                                dfg_utils.infer_end_activities(dfg))
    activities = dfg_utils.get_activities_from_dfg(dfg)

    net = PetriNet("")
    im = Marking()
    fm = Marking()

    source = PetriNet.Place("source")
    net.places.add(source)
    im[source] = 1
    sink = PetriNet.Place("sink")
    net.places.add(sink)
    fm[sink] = 1

    places_corr = {}
    index = 0

    for act in activities:
        places_corr[act] = PetriNet.Place(act)
        net.places.add(places_corr[act])

    for act in start_activities:
        if act in places_corr:
            index = index + 1
            trans = PetriNet.Transition(act + "_" + str(index), act)
            net.transitions.add(trans)
            pn_util.add_arc_from_to(source, trans, net)
            pn_util.add_arc_from_to(trans, places_corr[act], net)

    for act in end_activities:
        if act in places_corr:
            index = index + 1
            inv_trans = PetriNet.Transition(act + "_" + str(index), None)
            net.transitions.add(inv_trans)
            pn_util.add_arc_from_to(places_corr[act], inv_trans, net)
            pn_util.add_arc_from_to(inv_trans, sink, net)

    for el in dfg.keys():
        act1 = el[0]
        act2 = el[1]

        index = index + 1
        trans = PetriNet.Transition(act2 + "_" + str(index), act2)
        net.transitions.add(trans)

        pn_util.add_arc_from_to(places_corr[act1], trans, net)
        pn_util.add_arc_from_to(trans, places_corr[act2], net)

    return net, im, fm



In [9]:
def Unwanted_Activity(log, blacklist):
    print("Unwanted activity function")
    for case_index, case in enumerate(log):
        for event_index, event in enumerate(case):
            if(event["Activity"] in blacklist):
                print("Unwanted activity=> activity: %s -> case: %s that started @ %s " % (event["Activity"], event["Case ID"], event["Start Timestamp"]))
            

In [66]:
#Checks for the largest common prefix  
def lcp(s, t):  
  n = min(len(s),len(t));  
  for i in range(0,n):  
    if(s[i] != t[i]):  
      return s[0:i];  
  else:  
    return s[0:n];  

def Find_sequence(eventList):
    lrs="";  
    n = len(eventList);  
    for i in range(0,n):  
      for j in range(i+1,n):  
        #Checks for the largest common factors in every substring  
        x = lcp(eventList[i:n],eventList[j:n]);  
            #If the current prefix is greater than previous one   
            #then it takes the current one as longest repeating sequence  
        if(len(x) > len(lrs)):  
          lrs=x;    
    if(len(set(lrs))>1):
        print("Longest repeating sequence: ",lrs);  
    

In [11]:
def Backloop(log):
    print("Backloop function")
    for case_index, case in enumerate(log):
        eventList=[]
        for event_index, event in enumerate(case):
            eventList.append(event["Activity"])
        print("Repeating sequence for events in case:",case.attributes['concept:name'])   
        Find_sequence(eventList)
        
    #for trace in event_log:
    #    print(trace)

In [12]:
def find_duplicate_events(x): 
    _size = len(x) 
    duplicate_list = [] 
    for i in range(_size): 
        k = i + 1
        for j in range(k, _size): 
            if x[i] == x[j] and x[i] not in duplicate_list: 
                duplicate_list.append(x[i]) 
    return duplicate_list

In [69]:
def Redundant_Activity(log):
    print("Redundant_Activity function")
    for case_index, case in enumerate(log):
        print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        event_list=[]
        
        for event_index, event in enumerate(case):
            print("event start time: %s  event activity: %s" % (event["Start Timestamp"], event["Activity"]))
            event_list.append(event["Activity"])
            
        print ("The events which got repeated in the trace are",find_duplicate_events(event_list))

In [14]:
def Interface(log):
    print("Interface function")
    for case_index, case in enumerate(log):
        d={}
        l=""
        print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        
        for event_index, event in enumerate(case):
            if( len(d)!=0 and event["Activity"] in d.keys() and event["Resource"]!= d[event["Activity"]]):
                print("The resource has changed for the activity: %s from %s to %s"%(event["Activity"], d[event["Activity"]], event["Resource"]))
            d[event["Activity"]]=event["Resource"]
            
            
   

In [15]:
def Switch_of_media(log):
    print("Switch_of_media function")
    print("Logic is same as Interface function as there is no column for media in the given CSV ")

In [16]:
def Idle_time(log):
    print("Idle_time function")
    for case_index, case in enumerate(log):
        print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        prev_end_timestamp=0
        idle_time=0
        prev_activity=""
        for event_index, event in enumerate(case):
            if(prev_end_timestamp!=0):
                idle_time=pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S")-prev_end_timestamp
            print("Idle time between previous activity:%s and current activity:%s is %s"%(prev_activity, event["Activity"], idle_time))
            prev_end_timestamp=pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S") 
            prev_activity=event["Activity"]

In [17]:
def mean1(log):
    total_events=0
    avg_dict={}
    for case_index, case in enumerate(log): 
        for event_index, event in enumerate(case):
            total_events=+1
            if event["Activity"] not in avg_dict.keys():
                avg_dict[event["Activity"]]=((pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S"))/ pd.Timedelta(hours=1),1)
            else:
                avg_dict[event["Activity"]]=(avg_dict[event["Activity"]][0] + (pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S"))/ pd.Timedelta(hours=1),(avg_dict[event["Activity"]][1])+1)
    
    avg_dict2={}
    for k, v in avg_dict.items():
        avg_dict2[k]=v[0]/v[1]
    print(avg_dict2)
        

In [18]:
def Variance_of_process_times(log):
    print("Variance_of_process_times function")
    d={}#mean1(log)
    l=[]
    for case_index, case in enumerate(log): 
        for event_index, event in enumerate(case):
            if event["Activity"] not in d.keys():
                l=[]
            else:
                l=d[event["Activity"]]
            l.append( (pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S"))/ pd.Timedelta(hours=1))
            d[event["Activity"]]=(l)
    variance_dict={}
    for k,v in d.items():
        variance_dict[k]= np.var(v)
    print("Variance for each activity:")
    print(variance_dict)
        
            
            

In [19]:
def Bottleneck(log):
    print("Bottleneck function")
    for case_index, case in enumerate(log):
        print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        duration=0 
        a=""
        max_duration=0
        for event_index, event in enumerate(case):
            duration=pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S")
            if(max_duration==0 or duration>max_duration):
                max_duration=duration
                a=event["Activity"]
        print("Bottleneck Activity:%s took maximum time of %s to complete"%(a,max_duration ))
           
    

In [20]:
def Parallelizable_tasks():
    print("Parallelizable_tasks function\n\n")
    log_csv2 = pd.read_csv('Production_Data.csv', sep=',')
    log_csv2.rename(columns={'Activity': 'concept:name'}, inplace=True)
    parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'Case ID'}
    event_log2 = log_converter.apply(log_csv2, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG)
    #print(event_log)
    dfg_simple2 = dfg_discovery.apply(event_log2)
    #('Turning & Milling Q.C.', 'Turning & Milling - Machine 8'): 27,
    for k in dfg_simple2.keys():
        if(k[0]!=k[1]):
            if (k[1],k[0]) in dfg_simple2.keys():
                print(k," : are Parallelizable activities")

In [70]:
# Defining main function 
def main(): 
    print("Welcome to Joint Master thesis:\nModelling of production expertise to extend the data-driven analysis of process models") 
    
    '''#Import a log
    log = xes_importer.apply('running-example.xes')
    print("Log imported")
    
    #Explore the log
    #explore_log(log)
    
    #Define the forbidden sequence of events
    #simple restriction which says you cannot decide without examining thoroughly 
    forbidden_sequence=[( 'decide','examine thoroughly')]
    
    #Find different kinds of weakness in the log
    find_weakness(log, forbidden_sequence)
    
    #obtain_petrinet_from_dfg
    dfg_simple = dfg_discovery.apply(log)
    net, im, fm = obtain_petrinet_from_dfg(dfg_simple)

    #Visualise the petrinet obtained
    gviz = pn_vis_factory.apply(net, im, fm)
    pn_vis_factory.view(gviz)'''
    
    #log = xes_importer.apply('running-example.xes')
    log_csv = pd.read_csv('Production_Data.csv', sep=',')
    log = conversion_factory.apply(log_csv, parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                   constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                    constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY:"Start Timestamp",
                                                    constants.PARAMETER_CONSTANT_RESOURCE_KEY:"Resource",
                                                    constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:"Complete Timestamp"
                                                   })
    print("Log imported\n\n\n")
    
    logSorted = sorting.sort_timestamp(log,"Start Timestamp", False)
   
    #print(log)'sorted_log' x.attributes["concept:name"], x.events["Start Timestamp"]
    #for i in log:
    #    print(sorted(i),events['Start Timestamp'])
    
    blacklist=[ 'Lapping - Machine 1','Turning & Milling - Machine 8']
    #Unwanted_Activity(logSorted, blacklist)
    
    #Backloop(logSorted)
    
    Redundant_Activity(logSorted) 
    
    #Interface(logSorted)
    
    #Switch_of_media(logSorted) Same as Interface as there is no column for media in the given CSV 
    
    #Idle_time(logSorted)
    
    #Variance_of_process_times(logSorted)
    
    #Bottleneck(logSorted)
    
    #Parallelizable_tasks()
main()


Welcome to Joint Master thesis:
Modelling of production expertise to extend the data-driven analysis of process models
Log imported







Redundant_Activity function

 Case Id: Case 190


KeyError: 'Staet Timestamp'

In [57]:
log_csv = pd.read_csv('Production_Data.csv', sep=',')
log = conversion_factory.apply(log_csv, parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                               constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY:"Start Timestamp",
                                                constants.PARAMETER_CONSTANT_RESOURCE_KEY:"Resource",
                                                constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:"Complete Timestamp"
                                               })
print("Log imported\n\n\n")


#print(log)
for case_index, case in enumerate(log):
    print("Caseeeeee:",case)
    #print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
    for event_index, event in enumerate(case):
        print("Eventtttttt:",event)

  


Log imported



Caseeeeee: {'attributes': {'concept:name': 'Case 1'}, 'events': [{'Case ID': 'Case 1', 'Activity': 'Turning & Milling - Machine 4', 'Resource': 'Machine 4 - Turning & Milling', 'Start Timestamp': '1/29/2012 23:24:00', 'Complete Timestamp': '1/30/2012 5:43:00', 'Span': '006:19', 'Work Order  Qty': 10, 'Part Desc.': 'Cable Head', 'Worker ID': 'ID4932', 'Report Type': 'S', 'Qty Completed': 1, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '19:00.0'}, '..', {'Case ID': 'Case 1', 'Activity': 'Packing', 'Resource': 'Packing', 'Start Timestamp': '2/17/2012 0:00:00', 'Complete Timestamp': '2/17/2012 1:00:00', 'Span': '000:00', 'Work Order  Qty': 10, 'Part Desc.': 'Cable Head', 'Worker ID': 'ID4820', 'Report Type': 'D', 'Qty Completed': 9, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '00:00.0'}]}
Eventtttttt: {'Case ID': 'Case 1', 'Activity': 'Turning & Milling - Machine 4', 'Resource': 'Machine 4 - Turning & Milling', 'Start Timestamp': '1/29

Eventtttttt: {'Case ID': 'Case 17', 'Activity': 'Turning - Machine 8', 'Resource': 'Machine 15 - Turning', 'Start Timestamp': '2/28/2012 16:32:00', 'Complete Timestamp': '2/28/2012 19:22:00', 'Span': '002:50', 'Work Order  Qty': 100, 'Part Desc.': 'Clamp', 'Worker ID': 'ID4219', 'Report Type': 'S', 'Qty Completed': 1, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '50:00.0'}
Eventtttttt: {'Case ID': 'Case 17', 'Activity': 'Turning - Machine 8', 'Resource': 'Machine 15 - Turning', 'Start Timestamp': '2/28/2012 20:00:00', 'Complete Timestamp': '2/29/2012 6:40:00', 'Span': '010:40', 'Work Order  Qty': 100, 'Part Desc.': 'Clamp', 'Worker ID': 'ID4799', 'Report Type': 'D', 'Qty Completed': 59, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '40:00.0'}
Eventtttttt: {'Case ID': 'Case 17', 'Activity': 'Turning - Machine 8', 'Resource': 'Machine 15 - Turning', 'Start Timestamp': '2/29/2012 7:20:00', 'Complete Timestamp': '2/29/2012 15:40:00', 'Span': '008:20', '

Eventtttttt: {'Case ID': 'Case 194', 'Activity': 'Laser Marking - Machine 7', 'Resource': 'Machine 7- Laser Marking', 'Start Timestamp': '1/23/2012 15:30:00', 'Complete Timestamp': '1/23/2012 15:56:00', 'Span': '000:26', 'Work Order  Qty': 40, 'Part Desc.': 'Socket', 'Worker ID': 'ID0998', 'Report Type': 'D', 'Qty Completed': 38, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '26:00.0'}
Eventtttttt: {'Case ID': 'Case 194', 'Activity': 'Lapping - Machine 1', 'Resource': 'Machine 1 - Lapping', 'Start Timestamp': '1/24/2012 0:00:00', 'Complete Timestamp': '1/24/2012 1:15:00', 'Span': '000:00', 'Work Order  Qty': 40, 'Part Desc.': 'Socket', 'Worker ID': 'ID4882', 'Report Type': 'D', 'Qty Completed': 0, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '15:00.0'}
Eventtttttt: {'Case ID': 'Case 194', 'Activity': 'Lapping - Machine 1', 'Resource': 'Machine 1 - Lapping', 'Start Timestamp': '1/24/2012 8:14:00', 'Complete Timestamp': '1/24/2012 11:23:00', 'Span': '

Eventtttttt: {'Case ID': 'Case 23', 'Activity': 'Turning - Machine 5', 'Resource': 'Machine 5 - Turning & Milling', 'Start Timestamp': '1/23/2012 16:10:00', 'Complete Timestamp': '1/23/2012 20:13:00', 'Span': '004:03', 'Work Order  Qty': 13, 'Part Desc.': 'Plug', 'Worker ID': 'ID4167', 'Report Type': 'D', 'Qty Completed': 13, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '03:00.0'}
Eventtttttt: {'Case ID': 'Case 23', 'Activity': 'Turning & Milling - Machine 5', 'Resource': 'Machine 5 - Turning & Milling', 'Start Timestamp': '1/23/2012 20:43:00', 'Complete Timestamp': '1/24/2012 2:10:00', 'Span': '005:27', 'Work Order  Qty': 13, 'Part Desc.': 'Plug', 'Worker ID': 'ID4529', 'Report Type': 'S', 'Qty Completed': 0, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '27:00.0'}
Eventtttttt: {'Case ID': 'Case 23', 'Activity': 'Turning Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '1/24/2012 7:00:00', 'Complete Timestamp': '1/24/2012 8:00:00', 'Span': 

Eventtttttt: {'Case ID': 'Case 256', 'Activity': 'Lapping - Machine 1', 'Resource': 'Machine 1 - Lapping', 'Start Timestamp': '2/6/2012 13:47:00', 'Complete Timestamp': '2/6/2012 15:20:00', 'Span': '001:33', 'Work Order  Qty': 100, 'Part Desc.': 'Spinner', 'Worker ID': 'ID0998', 'Report Type': 'D', 'Qty Completed': 32, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '33:00.0'}
Eventtttttt: {'Case ID': 'Case 256', 'Activity': 'Turning & Milling Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '2/6/2012 15:00:00', 'Complete Timestamp': '2/6/2012 16:00:00', 'Span': '001:00', 'Work Order  Qty': 100, 'Part Desc.': 'Spinner', 'Worker ID': 'ID4287', 'Report Type': 'D', 'Qty Completed': 16, 'Qty Rejected': 1, 'Qty for MRB': 0, 'Rework': nan, 'duration': '00:00.0'}
Eventtttttt: {'Case ID': 'Case 256', 'Activity': 'Round Grinding - Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '2/6/2012 16:00:00', 'Complete Timestamp': '2/6/2012 18:00:00', 'Span': '002:00', 'Wo

Eventtttttt: {'Case ID': 'Case 268', 'Activity': 'Turning & Milling Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '1/30/2012 6:59:00', 'Complete Timestamp': '1/30/2012 7:29:00', 'Span': '000:30', 'Work Order  Qty': 35, 'Part Desc.': 'Locknut', 'Worker ID': 'ID4618', 'Report Type': 'D', 'Qty Completed': 0, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '30:00.0'}
Eventtttttt: {'Case ID': 'Case 268', 'Activity': 'Turning & Milling Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '1/30/2012 8:10:00', 'Complete Timestamp': '1/30/2012 9:25:00', 'Span': '000:00', 'Work Order  Qty': 35, 'Part Desc.': 'Locknut', 'Worker ID': 'ID4618', 'Report Type': 'D', 'Qty Completed': 34, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '15:00.0'}
Eventtttttt: {'Case ID': 'Case 268', 'Activity': 'Turning & Milling Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '2/6/2012 9:00:00', 'Complete Timestamp': '2/6/2012 10:21:00', 'Span': '001:21', 'Work O

Eventtttttt: {'Case ID': 'Case 52', 'Activity': 'Packing', 'Resource': 'Packing', 'Start Timestamp': '2/29/2012 0:00:00', 'Complete Timestamp': '2/29/2012 1:00:00', 'Span': '000:00', 'Work Order  Qty': 75, 'Part Desc.': 'Ballnut', 'Worker ID': 'ID4820', 'Report Type': 'D', 'Qty Completed': 33, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '00:00.0'}
Caseeeeee: {'attributes': {'concept:name': 'Case 53'}, 'events': [{'Case ID': 'Case 53', 'Activity': 'Turning & Milling - Machine 8', 'Resource': 'Machine 8 - Turning & Milling', 'Start Timestamp': '3/14/2012 13:50:00', 'Complete Timestamp': '3/14/2012 15:56:00', 'Span': '002:06', 'Work Order  Qty': 10, 'Part Desc.': 'Wheel', 'Worker ID': 'ID0997', 'Report Type': 'S', 'Qty Completed': 0, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '06:00.0'}, '..', {'Case ID': 'Case 53', 'Activity': 'Packing', 'Resource': 'Packing', 'Start Timestamp': '3/25/2012 0:00:00', 'Complete Timestamp': '3/25/2012 1:00:00', 'Span

Eventtttttt: {'Case ID': 'Case 89', 'Activity': 'Packing', 'Resource': 'Packing', 'Start Timestamp': '2/27/2012 0:00:00', 'Complete Timestamp': '2/27/2012 1:00:00', 'Span': '000:00', 'Work Order  Qty': 200, 'Part Desc.': 'Drum', 'Worker ID': 'ID4491', 'Report Type': 'D', 'Qty Completed': 200, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '00:00.0'}
Eventtttttt: {'Case ID': 'Case 89', 'Activity': 'Laser Marking - Machine 7', 'Resource': 'Machine 7- Laser Marking', 'Start Timestamp': '2/27/2012 11:08:00', 'Complete Timestamp': '2/27/2012 12:23:00', 'Span': '000:00', 'Work Order  Qty': 200, 'Part Desc.': 'Drum', 'Worker ID': 'ID0998', 'Report Type': 'D', 'Qty Completed': 200, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '15:00.0'}
Eventtttttt: {'Case ID': 'Case 89', 'Activity': 'Final Inspection Q.C.', 'Resource': 'Quality Check 1', 'Start Timestamp': '2/27/2012 11:09:00', 'Complete Timestamp': '2/27/2012 11:10:00', 'Span': '000:01', 'Work Order  Qty':

In [22]:
#Calling main function
#if __name__=="__main__": 
#    main() 

Welcome to Joint Master thesis:
Modelling of production expertise to extend the data-driven analysis of process models
Log imported



Unwanted activity function
Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 1 that started @ 2/14/2012 0:00:00 
Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 1 that started @ 2/14/2012 0:00:00 
Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 1 that started @ 2/14/2012 9:05:00 
Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 1 that started @ 2/14/2012 9:05:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 100 that started @ 2/20/2012 17:09:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 100 that started @ 2/20/2012 22:57:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 100 that started @ 2/21/2012 6:45:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 100 that started @ 2/21/20




Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 260 that started @ 2/16/2012 9:15:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 261 that started @ 2/6/2012 20:38:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 261 that started @ 2/7/2012 2:05:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 261 that started @ 2/7/2012 8:16:00 
Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 262 that started @ 2/7/2012 12:23:00 
Unwanted activity=> activity: Lapping - Machine 1 -> case: Case 262 that started @ 2/7/2012 13:40:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 263 that started @ 1/2/2012 9:00:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 263 that started @ 1/3/2012 11:52:00 
Unwanted activity=> activity: Turning & Milling - Machine 8 -> case: Case 263 that started @ 1/4/2012 6:59:00 
Unwanted activity=> activ