In [1]:
#All the imports here
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.visualization.process_tree import visualizer as pt_visualizer
from pm4py.objects.conversion.process_tree import converter as pt_converter
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.visualization.dfg import visualizer as dfg_visualization
from pm4py.objects.conversion.dfg import converter as dfg_mining
from pm4py.visualization.petrinet import factory as pn_vis_factory
from collections import defaultdict 
import pandas as pd
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.adapters.pandas import csv_import_adapter
from pm4py.objects.conversion.log import factory as conversion_factory
from pm4py.util import constants
import math
from datetime import date
import numpy as np
from pm4py.objects.log.util import sorting
from pm4py.algo.filtering.log.attributes import attributes_filter

In [2]:
%config IPCompleter.greedy=True

In [3]:
def explore_log(log):
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))
            

In [4]:
#This class represents a directed graph using dfg 
class Graph: 

    def __init__(self,vertices): 
        self.V= vertices #No. of vertices 
        self.graph = defaultdict(list) # default dictionary to store graph 

    # function to add an edge to graph 
    def addEdge(self,u,v): 
        self.graph[u].append(v) 
    
    # Use BFS to check path between s and d 
    def isReachable(self, s, d): 
        # Mark all the vertices as not visited 
        visited =[False]*(self.V) 

        # Create a queue for BFS 
        queue=[]

        # Mark the source node as visited and enqueue it 
        queue.append(s) 
        visited[s] = True

        while queue: 

            #Dequeue a vertex from queue 
            n = queue.pop(0) 

            # If this adjacent node is the destination node, 
            # then return true 
            if n == d: 
                return True

            # Else, continue to do BFS 
            for i in self.graph[n]: 
                if visited[i] == False: 
                    queue.append(i) 
                    visited[i] = True
        # If BFS is complete without visited d 
        return False



In [5]:
def find_weakness(log, forbidden_sequence):
    #Weakness 1: Duplicate or loop-> Same event repeating twice in the log
    for case_index, case in enumerate(log):
        print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
        event_list=[]
        
        for event_index, event in enumerate(case):
            print("event index: %d  event activity: %s" % (event_index, event["concept:name"]))
            event_list.append(event["concept:name"])
            
        print ("The events which got repeated in the trace are",find_duplicate_events(event_list))
        



    #Weakness 2: Find out if the forbidden sequence of events exists in the log
    #applying the Directly follows graph discovery to get the sequence which are directly following each other
    dfg_simple = dfg_discovery.apply(log)
    violated_restrictions=[]#for directly following each other or indirectly following
    for r in forbidden_sequence:
        count=0
        for d in dfg_simple.elements():
            if(r==d):
                count+=1
                violated_restrictions.append((r,count))
            #else if(r[0]==d[0]):
            
              
                
    print("Violated restrictions, Number of times violated: ",violated_restrictions)

In [6]:

# Create a graph for the given dfg

log = xes_importer.apply('running-example.xes')
dfg_simple = dfg_discovery.apply(log)

    
g = Graph(len(list(dfg_simple.elements())))
l=[]
for t in dfg_simple.elements(): 
    for x in t: 
        l.append(x) 
l=list(set(l))#list mapping every element to a number
for d in dfg_simple.elements():
    g.addEdge(l.index(d[0]),l.index(d[1]))
    

u =l.index("register request"); v = l.index("decide")

if g.isReachable(u, v): 
    print("There is a path from %s to %s" % (l[u],l[v])) 
else : 
    print("There is no path from %s to %s" % (l[u],l[v])) 



HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


There is a path from register request to decide


In [7]:
log = xes_importer.apply('running-example.xes')
dfg_simple = dfg_discovery.apply(log)

for case_index, case in enumerate(log):
    print(type(case))
    #dfg_simple1 = dfg_discovery.apply(case)
    

print(type(log))

HBox(children=(FloatProgress(value=0.0, description='parsing log, completed traces :: ', max=6.0, style=Progre…


<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.Trace'>
<class 'pm4py.objects.log.log.EventLog'>


In [8]:
from pm4py.objects.dfg.utils import dfg_utils
from pm4py.objects.petri.petrinet import PetriNet, Marking
from pm4py.objects.petri import utils as pn_util
from enum import Enum
from pm4py.util import exec_utils


class Parameters(Enum):
    START_ACTIVITIES = 'start_activities'
    END_ACTIVITIES = 'end_activities'



PARAM_KEY_START_ACTIVITIES = Parameters.START_ACTIVITIES
PARAM_KEY_END_ACTIVITIES = Parameters.END_ACTIVITIES

#obtain petrinet from dfg
def obtain_petrinet_from_dfg(dfg, parameters=None):
    """
    Applies the DFG mining on a given object (if it is a Pandas dataframe or a log, the DFG is calculated)

    Parameters
    -------------
    dfg
        Object (DFG) (if it is a Pandas dataframe or a log, the DFG is calculated)
    parameters
        Parameters
    """
    if parameters is None:
        parameters = {}

    dfg = dfg
    start_activities = exec_utils.get_param_value(Parameters.START_ACTIVITIES, parameters,
                                                  dfg_utils.infer_start_activities(
                                                      dfg))
    end_activities = exec_utils.get_param_value(Parameters.END_ACTIVITIES, parameters,
                                                dfg_utils.infer_end_activities(dfg))
    activities = dfg_utils.get_activities_from_dfg(dfg)

    net = PetriNet("")
    im = Marking()
    fm = Marking()

    source = PetriNet.Place("source")
    net.places.add(source)
    im[source] = 1
    sink = PetriNet.Place("sink")
    net.places.add(sink)
    fm[sink] = 1

    places_corr = {}
    index = 0

    for act in activities:
        places_corr[act] = PetriNet.Place(act)
        net.places.add(places_corr[act])

    for act in start_activities:
        if act in places_corr:
            index = index + 1
            trans = PetriNet.Transition(act + "_" + str(index), act)
            net.transitions.add(trans)
            pn_util.add_arc_from_to(source, trans, net)
            pn_util.add_arc_from_to(trans, places_corr[act], net)

    for act in end_activities:
        if act in places_corr:
            index = index + 1
            inv_trans = PetriNet.Transition(act + "_" + str(index), None)
            net.transitions.add(inv_trans)
            pn_util.add_arc_from_to(places_corr[act], inv_trans, net)
            pn_util.add_arc_from_to(inv_trans, sink, net)

    for el in dfg.keys():
        act1 = el[0]
        act2 = el[1]

        index = index + 1
        trans = PetriNet.Transition(act2 + "_" + str(index), act2)
        net.transitions.add(trans)

        pn_util.add_arc_from_to(places_corr[act1], trans, net)
        pn_util.add_arc_from_to(trans, places_corr[act2], net)

    return net, im, fm



In [9]:
cols=['Detected Weakness Row','Case ID','Weakness Type (AF/PA)','Weakness ID','Weakness Origin', 'Weakness Time','Weakness Information','Weakness Measurement', 'Weakness Level']
df=pd.DataFrame(columns=cols)
#len(cols)

'''row={cols[0]:1, cols[1]: 'Case 10', cols[2]:'FA', cols[3]:'1',cols[4]:'User',cols[5]:'10/02/2020 12:20',cols[6]:'Unwanted activity “a"',cols[7]:'Happened in the case',cols[8]:''}
#cols[3]:'1'
df = df.append(row, ignore_index=True)'''



'row={cols[0]:1, cols[1]: \'Case 10\', cols[2]:\'FA\', cols[3]:\'1\',cols[4]:\'User\',cols[5]:\'10/02/2020 12:20\',cols[6]:\'Unwanted activity “a"\',cols[7]:\'Happened in the case\',cols[8]:\'\'}\n#cols[3]:\'1\'\ndf = df.append(row, ignore_index=True)'

In [10]:
def Unwanted_Activity(log, blacklist):
    global df, cols
    print("Unwanted activity function")
    for case_index, case in enumerate(log):
        for event_index, event in enumerate(case):
            if(event["Activity"] in blacklist):
                #print("Unwanted activity=> activity: %s -> case: %s that started @ %s " % (event["Activity"], event["Case ID"], event["Start Timestamp"]))
                row={cols[0]:event["Case ID"]+"-> Event "+str(event_index), cols[1]: event["Case ID"], cols[2]:'AF', cols[3]:'1',cols[4]:'Expert',cols[5]:event["Start Timestamp"],cols[6]:'Unwanted activity \"'+event["Activity"]+'\"',cols[7]:'In the case', cols[8]:'Event level'}
                df=df.append(row, ignore_index=True)


In [11]:
#Checks for the largest common prefix  
def lcp(s, t):  
  n = min(len(s),len(t));  
  for i in range(0,n):  
    if(s[i] != t[i]):  
      return s[0:i];  
  else:  
    return s[0:n];  

def Find_sequence(eventList):
    lrs="";  
    n = len(eventList);  
    for i in range(0,n):  
      for j in range(i+1,n):  
        #Checks for the largest common factors in every substring  
        x = lcp(eventList[i:n],eventList[j:n]);  
            #If the current prefix is greater than previous one   
            #then it takes the current one as longest repeating sequence  
        if(len(x) > len(lrs)):
            lrs=x;
    
          
            
    if(len(set(lrs))>1):
        return (lrs);  
    

In [12]:
def Backloop(log):
    print("Backloop function")
    global df
    for case_index, case in enumerate(log):
        eventList=[]
        lrs=""
        indexList=[]
        for event_index, event in enumerate(case):
            eventList.append(event["Activity"])  
        if(Find_sequence(eventList) is not None ):
            lrs=Find_sequence(eventList)
            #print("Repeating sequence for events in case:",case.attributes['concept:name']," is: ", lrs)  
            row={cols[0]:case.attributes['concept:name'], cols[1]: case.attributes['concept:name'], cols[2]:'AF', cols[3]:'2',cols[4]:'Automatic detection',cols[5]:'',cols[6]:'Backloop {'+''.join(lrs)+'}',cols[7]:'In the case',cols[8]:'Case level'}
            df=df.append(row, ignore_index=True)
    #for trace in event_log:
    #    print(trace)

In [13]:
def find_duplicate_events(x): 
    _size = len(x) 
    duplicate_list = [] 
    for i in range(_size): 
        k = i + 1
        for j in range(k, _size): 
            if x[i] == x[j] and x[i] not in duplicate_list: 
                duplicate_list.append(x[i]) 
    return duplicate_list

In [14]:
def Redundant_Activity(log):
    global df
    print("Redundant_Activity function")
    for case_index, case in enumerate(log):
        #print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        event_list=[]
        
        for event_index, event in enumerate(case):
            #print("event start time: %s  event activity: %s" % (event["Start Timestamp"], event["Activity"]))
            event_list.append(event["Activity"])  
        duplicateEventList=[]
        duplicateEventList=find_duplicate_events(event_list)
        #print ("The events which got repeated in the trace are",duplicateEventList)
        if(len(duplicateEventList)>0):
            row={cols[0]:case.attributes["concept:name"], cols[1]: event["Case ID"], cols[2]:'AF', cols[3]:'3',cols[4]:'Automatic detection',cols[5]:"",cols[6]:'Redundant Activities list: \"'+''.join(duplicateEventList)+'\"',cols[7]:'In the case', cols[8]:'Case Level'}
            df=df.append(row, ignore_index=True)
            

In [15]:
def Interface(log):
    print("Interface function")
    global df
    for case_index, case in enumerate(log):
        d={}
        l=""
        #print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        
        '''for event_index, event in enumerate(case):
            if( len(d)!=0 and event["Activity"] in d.keys() and event["Resource"]!= d[event["Activity"]]):
                print("The resource has changed for the activity: %s from %s to %s"%(event["Activity"], d[event["Activity"]], event["Resource"]))
            d[event["Activity"]]=event["Resource"]'''
        prev=""
        for event_index, event in enumerate(case):
            if( prev!="" and  event["Resource"]!= prev):
                #print("The resource has changed for the activity: \"%s\" from \"%s\" to \"%s\""%(event["Activity"], prev, event["Resource"]))
                row={cols[0]:event["Case ID"]+"-> Event "+str(event_index), cols[1]: event["Case ID"], cols[2]:'AF', cols[3]:'4',cols[4]:'Automatic detection',cols[5]:event["Start Timestamp"],cols[6]:'Change of interface for activity '+event["Activity"]+' from ' +prev+' to '+ event["Resource"],cols[7]:'In the case',cols[8]:'Event Level'}
                df=df.append(row, ignore_index=True)


            prev=event["Resource"]
            
            
   

In [16]:
def Switch_of_media(log):
    print("Switch_of_media function")
    print("Logic is same as Interface function as there is no column for media in the given CSV ")

In [17]:
def Idle_time(log,maxTime):
    global df
    print("Idle_time function")
    for case_index, case in enumerate(log):
        #print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        prev_end_timestamp=0
        idle_time=0
        prev_activity=""
        for event_index, event in enumerate(case):
            if(prev_end_timestamp!=0):
                idle_time=pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S")-prev_end_timestamp
            #print("Idle time between previous activity:%s and current activity:%s is %s"%(prev_activity, event["Activity"], idle_time))
            #if(type(idle_time)!= int):
            #    print(idle_time.total_seconds  )#idle_time/np.timedelta64(1,'s'))
            if(type(idle_time)!= int and idle_time.total_seconds()>maxTime):
                #.total_seconds()>7200) :
                row={cols[0]:event["Case ID"]+"-> Event "+str(event_index), cols[1]: event["Case ID"], cols[2]:'AF', cols[3]:'6',cols[4]:'Expert',cols[5]:event["Start Timestamp"],cols[6]:'Idletime between '+prev_activity+' to ' +event["Activity"]+' is '+ str(idle_time),cols[7]:'In the case',cols[8]:'Event level'}
                df=df.append(row, ignore_index=True)
            prev_end_timestamp=pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S") 
            prev_activity=event["Activity"]
            

In [18]:
def mean1(log):
    total_events=0
    avg_dict={}
    for case_index, case in enumerate(log): 
        for event_index, event in enumerate(case):
            total_events=+1
            if event["Activity"] not in avg_dict.keys():
                avg_dict[event["Activity"]]=((pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S"))/ pd.Timedelta(hours=1),1)
            else:
                avg_dict[event["Activity"]]=(avg_dict[event["Activity"]][0] + (pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S"))/ pd.Timedelta(hours=1),(avg_dict[event["Activity"]][1])+1)
    
    avg_dict2={}
    for k, v in avg_dict.items():
        avg_dict2[k]=v[0]/v[1]
    print(avg_dict2)
        

In [19]:
def Variance_of_process_times(log):
    global df
    print("Variance_of_process_times function")
    d={}#mean1(log)
    l=[]
    for case_index, case in enumerate(log): 
        for event_index, event in enumerate(case):
            if event["Activity"] not in d.keys():
                l=[]
            else:
                l=d[event["Activity"]]
            l.append( (pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S"))/ pd.Timedelta(hours=1))
            d[event["Activity"]]=(l)
    variance_dict={}
    for k,v in d.items():
        variance_dict[k]= (min(v), max(v),np.mean(v) ,np.var(v))
        row={cols[0]:"All Activities", cols[1]: k, cols[2]:'AF', cols[3]:'7',cols[4]:'Automatic detection',cols[5]:'',cols[6]:'(Min, Max, Average, Variance) for current activity:'+str((min(v), max(v),np.mean(v) ,np.var(v))),cols[7]:'In the Activity',cols[8]:'Activity Level'}
        df=df.append(row, ignore_index=True)
    #print("(Min, Max, Average, Variance) for each activity:")
    #print(variance_dict)
    
    
   
        
            
            

In [20]:
def Bottleneck(log):
    global df
    print("Bottleneck function")
    '''for case_index, case in enumerate(log):
        print("\n Case Id: %s" % ( case.attributes["concept:name"]))
        duration=0 
        a=""
        max_duration=0
        for event_index, event in enumerate(case):
            duration=pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S")
            if(max_duration==0 or duration>max_duration):
                max_duration=duration
                a=event["Activity"]
        print("Bottleneck Activity at case level:%s took maximum time of %s to complete"%(a,max_duration ))'''
    duration=0 
    a=""
    max_duration=0
    for case_index, case in enumerate(log):
        for event_index, event in enumerate(case):
            duration=pd.to_datetime(event["Complete Timestamp"], format = "%m/%d/%Y %H:%M:%S")-pd.to_datetime(event["Start Timestamp"], format = "%m/%d/%Y %H:%M:%S")
            if(max_duration==0 or duration>max_duration):
                max_duration=duration
                a=event["Activity"]
    
    print("Bottleneck Activity on log level:%s took maximum time of %s to complete"%(a,max_duration ))
    row={cols[0]:"All Activities", cols[1]: a, cols[2]:'AF', cols[3]:'8',cols[4]:'Automatic detection',cols[5]:'',cols[6]:'Activity took maximum time of '+str(max_duration),cols[7]:'In the Activity',cols[8]:'Log Level'}
    df=df.append(row, ignore_index=True)
    

In [21]:
def Parallelizable_tasks_loglevel():
    print("Parallelizable_tasks function\n\n")
    log_csv2 = pd.read_csv('Production_Data.csv', sep=',')
    log_csv2.rename(columns={'Activity': 'concept:name'}, inplace=True)
    parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'Case ID'}
    event_log2 = log_converter.apply(log_csv2, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG)
    #print(event_log)
    dfg_simple2 = dfg_discovery.apply(event_log2)
    #print(dfg_simple2)
    #('Turning & Milling Q.C.', 'Turning & Milling - Machine 8'): 27,
    for k in dfg_simple2.keys():
        if(k[0]!=k[1]):
            if (k[1],k[0]) in dfg_simple2.keys():
                print(k," : are Parallelizable activities")
         
                

In [22]:
def Parallelizable_tasks_CaseLevel():
    global df
    print("Parallelizable_tasks_CaseLevel function\n\n")
    log_csv3 = pd.read_csv('Production_Data.csv', sep=',')
    log_csv3.rename(columns={'Activity': 'concept:name'}, inplace=True)
    parameters = {log_converter.Variants.TO_EVENT_LOG.value.Parameters.CASE_ID_KEY: 'Case ID'}
    event_log3 = log_converter.apply(log_csv3, parameters=parameters, variant=log_converter.Variants.TO_EVENT_LOG)
    event_log3 = sorting.sort_timestamp(event_log3,"Start Timestamp", False)
    
    for case_index, case in enumerate(event_log3):
        
        tracefilter_log_pos = attributes_filter.apply(event_log3, [case.attributes["concept:name"]],
                                          parameters={attributes_filter.Parameters.ATTRIBUTE_KEY : "Case ID", attributes_filter.Parameters.POSITIVE: True})
        
        dfg_simple3 = dfg_discovery.apply(tracefilter_log_pos)
        l=[]
        for k in dfg_simple3.keys():
            if(k[0]!=k[1]):
                if (k[1],k[0]) in dfg_simple3.keys():
                    l.append((k[1],k[0]))
        l1=[]
        for i in l:
            if (i[1],i[0]) in l:
                l1.append((i[0],i[1]))
                l.remove((i[1],i[0]))
                l.remove((i[0],i[1]))
        if(len(l)>0)   :  
            row={cols[0]:case.attributes['concept:name'], cols[1]: case.attributes['concept:name'], cols[2]:'AF', cols[3]:'9',cols[4]:'Automatic detection',cols[5]:'',cols[6]:'Parallelizable tasks :'+''.join(str(l1)),cols[7]:'In the case',cols[8]:'Case level'}
            df=df.append(row, ignore_index=True)
            #print("\n\nParallelizable tasks for Case:",case.attributes["concept:name"]," are => ", end=" ")
            #print(l1)
        

            

In [23]:
# Defining main function 
def main(): 
    print("Welcome to Joint Master thesis:\nModelling of production expertise to extend the data-driven analysis of process models") 
    
    '''#Import a log
    log = xes_importer.apply('running-example.xes')
    print("Log imported")
    
    #Explore the log
    #explore_log(log)
    
    #Define the forbidden sequence of events
    #simple restriction which says you cannot decide without examining thoroughly 
    forbidden_sequence=[( 'decide','examine thoroughly')]
    
    #Find different kinds of weakness in the log
    find_weakness(log, forbidden_sequence)
    
    #obtain_petrinet_from_dfg
    dfg_simple = dfg_discovery.apply(log)
    net, im, fm = obtain_petrinet_from_dfg(dfg_simple)

    #Visualise the petrinet obtained
    gviz = pn_vis_factory.apply(net, im, fm)
    pn_vis_factory.view(gviz)'''
    
    #log = xes_importer.apply('running-example.xes')
    log_csv = pd.read_csv('Production_Data.csv', sep=',')
    log = conversion_factory.apply(log_csv, parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                                   constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                    constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY:"Start Timestamp",
                                                    constants.PARAMETER_CONSTANT_RESOURCE_KEY:"Resource",
                                                    constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:"Complete Timestamp"
                                                   })
    print("Log imported\n\n\n")
    
    logSorted = sorting.sort_timestamp(log,"Start Timestamp", False)
    #cols=['Detected Weakness Row','Case ID','Weakness Type (AF/PA)','Weakness ID','Weakness Origin', 'Weakness Time','Weakness Information','Weakness Measurement']
    #df=pd.DataFrame(columns=cols)
    #print(log)'sorted_log' x.attributes["concept:name"], x.events["Start Timestamp"]
    #for i in log:
    #    print(sorted(i),events['Start Timestamp'])
    
    blacklist=[ 'Lapping - Machine 1','Turning & Milling - Machine 8']
    #Unwanted_Activity(logSorted, blacklist)
    
    #Backloop(logSorted)
    
    #Redundant_Activity(logSorted) 
    
    #Interface(logSorted)
    
    #Switch_of_media(logSorted) Same as Interface as there is no column for media in the given CSV 
    maxTime=86400
    #Idle_time(logSorted, maxTime)
    
    #Variance_of_process_times(logSorted)
    
    #Bottleneck(logSorted)
    
    ##Parallelizable_tasks_loglevel()
    Parallelizable_tasks_CaseLevel()
    print(df)
main()


Welcome to Joint Master thesis:
Modelling of production expertise to extend the data-driven analysis of process models




Log imported



Parallelizable_tasks_CaseLevel function


   Detected Weakness Row   Case ID Weakness Type (AF/PA) Weakness ID  \
0               Case 207  Case 207                    AF           9   
1               Case 225  Case 225                    AF           9   
2               Case 233  Case 233                    AF           9   
3               Case 194  Case 194                    AF           9   
4               Case 253  Case 253                    AF           9   
5               Case 267  Case 267                    AF           9   
6               Case 199  Case 199                    AF           9   
7               Case 187  Case 187                    AF           9   
8               Case 198  Case 198                    AF           9   
9               Case 210  Case 210                    AF           9   
10              Case 192  Case 192                    AF           9   
11              Case 257  Case 257                    AF           9   
12    

In [24]:
log_csv = pd.read_csv('Production_Data.csv', sep=',')
log = conversion_factory.apply(log_csv, parameters={constants.PARAMETER_CONSTANT_CASEID_KEY: "Case ID",
                                               constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "Activity",
                                                constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY:"Start Timestamp",
                                                constants.PARAMETER_CONSTANT_RESOURCE_KEY:"Resource",
                                                constants.PARAMETER_CONSTANT_TIMESTAMP_KEY:"Complete Timestamp"
                                               })
print("Log imported\n\n\n")


#print(log)
'''for case_index, case in enumerate(log):
    print("Caseeeeee:",case)
    #print("\n case index: %d  case id: %s" % (case_index, case.attributes["concept:name"]))
    for event_index, event in enumerate(case):
        print("Eventtttttt:",event)'''

activities = attributes_filter.get_attribute_values(log, "Case ID")
tracefilter_log_pos = attributes_filter.apply(log, ["Case 1"],
                                          parameters={attributes_filter.Parameters.ATTRIBUTE_KEY : "Case ID", attributes_filter.Parameters.POSITIVE: True})
print("###########################\n",tracefilter_log_pos)

Log imported



###########################
 [{'attributes': {'concept:name': 'Case 1'}, 'events': [{'Case ID': 'Case 1', 'Activity': 'Turning & Milling - Machine 4', 'Resource': 'Machine 4 - Turning & Milling', 'Start Timestamp': '1/29/2012 23:24:00', 'Complete Timestamp': '1/30/2012 5:43:00', 'Span': '006:19', 'Work Order  Qty': 10, 'Part Desc.': 'Cable Head', 'Worker ID': 'ID4932', 'Report Type': 'S', 'Qty Completed': 1, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '19:00.0'}, '..', {'Case ID': 'Case 1', 'Activity': 'Packing', 'Resource': 'Packing', 'Start Timestamp': '2/17/2012 0:00:00', 'Complete Timestamp': '2/17/2012 1:00:00', 'Span': '000:00', 'Work Order  Qty': 10, 'Part Desc.': 'Cable Head', 'Worker ID': 'ID4820', 'Report Type': 'D', 'Qty Completed': 9, 'Qty Rejected': 0, 'Qty for MRB': 0, 'Rework': nan, 'duration': '00:00.0'}]}]


  


In [25]:
#Calling main function
#if __name__=="__main__": 
#    main() 

In [26]:
df

Unnamed: 0,Detected Weakness Row,Case ID,Weakness Type (AF/PA),Weakness ID,Weakness Origin,Weakness Time,Weakness Information,Weakness Measurement,Weakness Level
0,Case 207,Case 207,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling - M...,In the case,Case level
1,Case 225,Case 225,AF,9,Automatic detection,,Parallelizable tasks :[('Round Grinding - Mach...,In the case,Case level
2,Case 233,Case 233,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling Q.C...,In the case,Case level
3,Case 194,Case 194,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling Q.C...,In the case,Case level
4,Case 253,Case 253,AF,9,Automatic detection,,"Parallelizable tasks :[('Lapping - Machine 1',...",In the case,Case level
5,Case 267,Case 267,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling Q.C...,In the case,Case level
6,Case 199,Case 199,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling - M...,In the case,Case level
7,Case 187,Case 187,AF,9,Automatic detection,,"Parallelizable tasks :[('Lapping - Machine 1',...",In the case,Case level
8,Case 198,Case 198,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling Q.C...,In the case,Case level
9,Case 210,Case 210,AF,9,Automatic detection,,Parallelizable tasks :[('Turning & Milling Q.C...,In the case,Case level


In [27]:
df['Weakness Information'][0]

"Parallelizable tasks :[('Turning & Milling - Machine 9', 'Turning & Milling - Machine 6'), ('Turning & Milling Q.C.', 'Turning & Milling - Machine 9')]"