In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install pm4py owlready2 pandas

State Machine model definition

In [3]:
from collections import Counter
from copy import deepcopy
import time
import networkx as nx
class StateMachine(object):
    class state(object):
        def __init__(self, name, type, Resourcerequirements=None):
            self.__name = name
            self.__type = type
            self.__Resourcerequirements = set() if Resourcerequirements is None else Resourcerequirements

        def __set_name(self, name):
            self.__name = name

        def __get_name(self):
            return self.__name

        def __set_type(self, type):
            self.__type = type
        
        def set_type(self, type):
            self.__type = type

        def __get_type(self):
            return self.__type

        def __get_out_transitions(self):
            return self.__out_transitions

        def __get_in_transitions(self):
            return self.__in_transitions

        def __get_Resourcerequirements(self):
            return self.__Resourcerequirements

        def __repr__(self):
            return str("("+self.name+", "+ self.type +", "+ repr(self.Resourcerequirements) +")" )

        def __str__(self):
            return self.__repr__()

        def __eq__(self, other):
            # keep the ID for now in states
            return id(self) == id(other)

        def __hash__(self):
            # keep the ID for now in states
            return id(self)

        def __deepcopy__(self, memodict={}):
            if id(self) in memodict:
                return memodict[id(self)]
            new_state = StateMachine.state(self.name, Resourcerequirements=self.Resourcerequirements)
            memodict[id(self)] = new_state
            for transition in self.in_transitions:
                new_transition = deepcopy(transition, memo=memodict)
                new_state.in_transitions.add(new_transition)
            for transition in self.out_transitions:
                new_transition = deepcopy(transition, memo=memodict)
                new_state.out_transitions.add(new_transition)
            return new_state

        name = property(__get_name, __set_name)
        type = property(__get_type, __set_type)
        Resourcerequirements = property(__get_Resourcerequirements)

    class transition(object):
        def __init__(self, name, source, target, events=None, actions=None):
            self.__name = name
            self.__source = source
            self.__target = target
            self.__actions = set() if actions is None else actions
            self.__events = set() if events is None else events

        def __get_name(self):
            return self.__name

        def __get_source(self):
            return self.__source
        
        def __get_actions(self):
            return self.__actions

        def __get_events(self):
            return self.__events

        def __get_target(self):
            return self.__target

        def __get_properties(self):
            return self.__properties

        def __repr__(self):
            name_rep = repr(self.name)
            source_rep = repr(self.source)
            target_rep = repr(self.target)
            events_rep = repr(self.events)
            actions_rep = repr(self.actions)
            return "("+name_rep+":"+source_rep+"->"+target_rep+","+events_rep+","+actions_rep+")"

        def __str__(self):
            return self.__repr__()

        def __hash__(self):
            return id(self)

        def __eq__(self, other):
            return self.source == other.source and self.target == other.target

        def __deepcopy__(self, memodict={}):
            if id(self) in memodict:
                return memodict[id(self)]
            new_source = memodict[id(self.source)] if id(self.source) in memodict else deepcopy(self.source,
                                                                                                memo=memodict)
            new_target = memodict[id(self.target)] if id(self.target) in memodict else deepcopy(self.target,
                                                                                                memo=memodict)
            memodict[id(self.source)] = new_source
            memodict[id(self.target)] = new_target
            new_transition = StateMachine.transition(new_source, new_target, weight=self.weight, properties=self.properties)
            memodict[id(self)] = new_transition
            return new_transition

        name = property(__get_name)
        source = property(__get_source)
        target = property(__get_target)
        events = property(__get_events)
        actions = property(__get_actions)
        properties = property(__get_properties)

    class event(object):
        def __init__(self, id, type, predicate):
            self.__id = id
            self.__type = type
            self.__predicate = predicate
        
        def __get_id(self):
            return self.__id
        
        def __get_type(self):
            return self.__type
        
        def __get_predicate(self):
            return self.__predicate
        
        id = property(__get_id)
        type = property(__get_type)
        predicate = property(__get_predicate)

    class action(object):
        def __init__(self, id, type, attributes):
            self.__id = id
            self.__type = type
            self.__attributes = attributes
        
        def __get_id(self):
            return self.__id
        
        def __get_type(self):
            return self.__type

    def add_state(self, new_state):
        self.states.add(new_state)
        self.graph.add_node(new_state.name)

    def add_transition(self, new_transition):
        self.transitions.add(new_transition)
        self.graph.add_edge(new_transition.source, new_transition.target)

    def __init__(self, name=None, states=None, transitions=None):
        self.__name = "" if name is None else name
        self.__states = set() if states is None else states
        self.__transitions = set() if transitions is None else transitions
        self.__graph = nx.DiGraph()

    def __get_name(self):
        return self.__name

    def __set_name(self, name):
        self.__name = name

    def __get_states(self):
        return self.__states

    def __get_transitions(self):
        return self.__transitions

    def __get_graph(self):
        return self.__graph

    def __hash__(self):
        ret = 0
        for p in self.states:
            ret += hash(p)
            ret = ret % 479001599
        for t in self.transitions:
            ret += hash(t)
            ret = ret % 479001599
        return ret

    def __eq__(self, other):
        # for the Petri net equality keep the ID for now
        return id(self) == id(other)

    def __deepcopy__(self, memodict={}):
        from pm4py.objects.petri_net.utils.petri_utils import add_transition_from_to
        this_copy = StateMachine(self.name)
        memodict[id(self)] = this_copy
        for state in self.states:
            state_copy = StateMachine.state(state.name, properties=state.properties)
            this_copy.states.add(state_copy)
            memodict[id(state)] = state_copy
        for trans in self.transitions:
            trans_copy = StateMachine.Transition(trans.name, trans.label, properties=trans.properties)
            this_copy.transitions.add(trans_copy)
            memodict[id(trans)] = trans_copy
        for transition in self.transitions:
            add_transition_from_to(memodict[id(transition.source)], memodict[id(transition.target)], this_copy, weight=transition.weight)
        return this_copy

    def __repr__(self):
        ret = ["states: ["]
        states_rep = []
        for state in self.states:
            states_rep.append(repr(state))
        states_rep.sort()
        ret.append(" " + ", ".join(states_rep) + " ")
        ret.append("]\ntransitions: [")
        trans_rep = []
        for trans in self.transitions:
            trans_rep.append(repr(trans))
        trans_rep.sort()
        ret.append(" " + ", ".join(trans_rep) + " ")
        ret.append("]")
        return "".join(ret)

    def __str__(self):
        return self.__repr__()

    name = property(__get_name, __set_name)
    states = property(__get_states)
    transitions = property(__get_transitions)
    graph = nx.DiGraph()


SM_defined = StateMachine(name='UI')
SM_defined.states.add(
    StateMachine.state(
        name = 'S1',
        type = 'isInitial',
        Resourcerequirements = {
            'replicas': 1
        }
    ))
SM_defined.states.add(
    StateMachine.state(
        name = 'S2',
        type = 'isNormal',
        Resourcerequirements = {
            'replicas': 2
        }
    ))
SM_defined.states.add(
    StateMachine.state(
        name = 'S3',
        type = 'isNormal',
        Resourcerequirements = {
            'replicas': 4
        }
    ))
SM_defined.states.add(
    StateMachine.state(
        name = 'S4',
        type = 'isFinal',
        Resourcerequirements = {
            'replicas': 0
        }
    ))
SM_defined.transitions.add(
    StateMachine.transition(
    'T1', 'S1', 'S4', 
        events={
            'id': "E1",
            'type': "TemporalEvent",
            'predicate': "2021-08-04 15:00:00"}, 
        actions={
            'id': "A1",
            'type': "Scale-in",
            'attributes': {
                'resource': "UI",
                'replicas': "1"}
        })
    )
SM_defined.transitions.add(
    StateMachine.transition(
    'T2', 'S2', 'S4', 
        events={
            'id': "E1",
            'type': "TemporalEvent",
            'predicate': "2021-08-04 15:00:00"}, 
        actions={
            'id': "A2",
            'type': "Scale-in",
            'attributes': {
                'resource': "UI",
                'replicas': "2"}
        })
    )
SM_defined.transitions.add(
    StateMachine.transition(
    'T3', 'S3', 'S4', 
        events={
            'id': "E1",
            'type': "TemporalEvent",
            'predicate': "2021-08-04 15:00:00"}, 
        actions={
            'id': "A2",
            'type': "Scale-in",
            'attributes': {
                'resource': "UI",
                'replicas': "2"}
        })
    )
SM_defined.transitions.add(
    StateMachine.transition(
    'T4', 'S1', 'S2', 
        events={
            'id': "E3",
            'type': "ResourceRelatedEvent",
            'predicate': {
                'metric': "cpuusage",
                'operator': ">=",
                'refValue': 80,
                'time': "60s"
            }}, 
        actions={
            'id': "A3",
            'type': "Scale-out",
            'attributes': {
                'resource': "UI",
                'replicas': "1"}
        })
    )
SM_defined.transitions.add(
    StateMachine.transition(
    'T5', 'S2', 'S3', 
        events={
            'id': "E2",
            'type': "ResourceRelatedEvent",
            'predicate': {
                'metric': "cpuusage",
                'operator': "<=",
                'refValue': 20,
                'time': "60s"
            }}, 
        actions={
            'id': "A3",
            'type': "Scale-out",
            'attributes': {
                'resource': "UI",
                'replicas': "1"}
        })
    )
SM_defined.transitions.add(
    StateMachine.transition(
    'T6', 'S3', 'S2', 
        events={
            'id': "E2",
            'type': "ResourceRelatedEvent",
            'predicate': {
                'metric': "cpuusage",
                'operator': "<=",
                'refValue': 20,
                'time': "60s"
            }}, 
        actions={
            'id': "A4",
            'type': "Scale-in",
            'attributes': {
                'resource': "UI",
                'replicas': "1"}
        })
    )
SM_defined.transitions.add(
    StateMachine.transition(
    'T7', 'S2', 'S1', 
        events={
            'id': "E2",
            'type': "ResourceRelatedEvent",
            'predicate': {
                'metric': "cpuusage",
                'operator': "<=",
                'refValue': 20,
                'time': "60s"
            }}, 
        actions={
            'id': "A4",
            'type': "Scale-in",
            'attributes': {
                'resource': "UI",
                'replicas': "1"}
        })
    )

<h1>Experiments Leveraging Conformance Checking Techniques for Multi-Cloud SLA Compliance </h1>

The main objective of this notebook is to present the implementation part of the paper submited to SAC 2023. This implementation is decomposed in two steps: Log pre-processing and conformance checking. For this example, we will work on the following example of event logs collected from an execution on Docker Swarm and the following state machine.

In [4]:
import pandas as pd
pd.set_option('display.width',1000)

df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/logs.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

print(df)

             Timestamp     Source Resource Name         Event-Type     Metric Value
0  2022-11-15 00:00:03   Provider            UI     Service_Create   replicas     2
1  2022-11-15 00:00:04   Provider            UI   Container_Create          /     /
2  2022-11-15 00:00:05   Provider            UI    Container_Start          /     /
3  2022-11-15 00:00:05  Ressource            UI    Ressource_Usage  Cpu Usage   15%
4  2022-11-15 00:01:05  Ressource            UI    Ressource_Usage  Cpu Usage   15%
..                 ...        ...           ...                ...        ...   ...
73 2022-11-15 00:02:25  Ressource          Stor    Ressource_Usage  Cpu Usage   15%
74 2022-11-15 00:02:30  Ressource          Stor    Ressource_Usage  Cpu Usage   15%
75 2022-11-15 00:02:45   Provider          Stor     Service_Update   replicas     0
76 2022-11-15 00:02:46   Provider          Stor     Container_Stop          /     /
77 2022-11-15 00:02:47   Provider          Stor  Container_Destroy          

<h2>Pre-processing the collected logs</h2>
<h3>Annotation</h3>

We begin the pre-processing by the annotation of this later collected event logs based on the domain Knowledge formulated as an ontology with protègè. This domain knowledge represents the correlation between event type and state-machine elements. 

In [5]:
# Importing the owlready2 library and load the ontology 
from owlready2 import *
onto = get_ontology("/content/drive/MyDrive/Colab Notebooks/eventLog.owl").load()

Then, we perform the high-level activity Identification using the ontology which returns the enriched Event logs with High-Level Activity. 

In [6]:
def get_ancestor(onto, value):
    """
        Return the ancestor of a eventType to identify if it's related to an Event or a Transition
        Input Ontology 'owlready', value: name of classes
    """
    ancestor = {}
    # Search the value in the ontology
    search = onto.search(iri = f"*{value}")[0]
    if search != None:
        # Identify ancestor which is not the root node or the class itself
        ancestor[0] = search.is_a[0]
        if ancestor[0].name != 'Transition':
            ancestor[1] = ancestor[0].is_a[0].name
            ancestor[0] = search.is_a[0].name
        else:
            ancestor[1] = ancestor[0].name
            ancestor[0] = ''
        return ancestor
    else:
        return 'N/A'

# StateMachine Element
smElement = []
# Lifecycle Step
lcStep = []
for index, row in df.iterrows():
    anc = get_ancestor(onto, row['Event-Type'])
    smElement.append(anc[1])
    lcStep.append(anc[0])

df['smElement'] = smElement
df['lcStep'] = lcStep

print(df)

             Timestamp     Source Resource Name         Event-Type     Metric Value   smElement    lcStep
0  2022-11-15 00:00:03   Provider            UI     Service_Create   replicas     2       State     Start
1  2022-11-15 00:00:04   Provider            UI   Container_Create          /     /       State   Execute
2  2022-11-15 00:00:05   Provider            UI    Container_Start          /     /       State  Complete
3  2022-11-15 00:00:05  Ressource            UI    Ressource_Usage  Cpu Usage   15%  Transition          
4  2022-11-15 00:01:05  Ressource            UI    Ressource_Usage  Cpu Usage   15%  Transition          
..                 ...        ...           ...                ...        ...   ...         ...       ...
73 2022-11-15 00:02:25  Ressource          Stor    Ressource_Usage  Cpu Usage   15%  Transition          
74 2022-11-15 00:02:30  Ressource          Stor    Ressource_Usage  Cpu Usage   15%  Transition          
75 2022-11-15 00:02:45   Provider          Sto

<h3>Abstraction</h3>
Then, we perform the abstraction of Annotated logs in order to discover a state-machine representing the "real" observed behavior using defined patterns.

In [7]:
from numpy import empty

Discovered_SM = {}
#Patterns implementation
## State Abstraction
df_group = df.groupby(['Resource Name'])
for Resource in df_group.groups:
    Discovered_SM[Resource] = {}
    Discovered_SM[Resource]['States'] = []
    Discovered_SM[Resource]['Transitions'] = []
    df_resource = df[df_group.groups[Resource][0]:df_group.groups[Resource][-1]]
    state_pattern = (df_resource['lcStep'] == "Start") & (df_resource['lcStep'].shift(-1) == 'Execute') & (df_resource['lcStep'].shift(-2) == 'Complete')
    indice_state = df_resource.index[state_pattern]
    if indice_state is not empty:
        for i in indice_state:
            Discovered_SM[Resource]['States'].append(StateMachine.state(name = 'S_disc_'+str((len(Discovered_SM[Resource]['States'])+1)), type = 'notDefined',Resourcerequirements = {'replicas': df['Value'][i]}))
    else:
        print("No state has been discovered")

## State-Type Abstraction
for SM in Discovered_SM:
    max = len(Discovered_SM[SM]['States'])
    Discovered_SM[SM]['States'][0].type = 'isInitial'
    Discovered_SM[SM]['States'][max-1].type = 'isFinal'
    for i in range(1,(max-1)):
        Discovered_SM[SM]['States'][i].type = 'isNormal'

## Reconfiguration Actions Abstraction
for SM in Discovered_SM:
    nb_states = len(Discovered_SM[SM]['States'])
    for state in range(nb_states - 1):
        if Discovered_SM[SM]['States'][state].Resourcerequirements['replicas'] > Discovered_SM[SM]['States'][state+1].Resourcerequirements['replicas'] :
            value_action = '-' + str(int(Discovered_SM[SM]['States'][state+1].Resourcerequirements['replicas']) - int(Discovered_SM[SM]['States'][state].Resourcerequirements['replicas']))
        else:
            value_action = '+' + str(int(Discovered_SM[SM]['States'][state+1].Resourcerequirements['replicas']) - int(Discovered_SM[SM]['States'][state].Resourcerequirements['replicas']))
        Discovered_SM[SM]['Transitions'].append({'lt':'T_disc_'+str((len(Discovered_SM[SM]['Transitions'])+1)),'ss': Discovered_SM[SM]['States'][state].name,'st': Discovered_SM[SM]['States'][state+1].name,'E':'','A':value_action})

print("Discovered State-Machines")
print(Discovered_SM)

Discovered State-Machines
{'Auth': {'States': [(S_disc_1, isInitial, {'replicas': '2'}), (S_disc_2, isNormal, {'replicas': '4'}), (S_disc_3, isNormal, {'replicas': '6'}), (S_disc_4, isFinal, {'replicas': '8'})], 'Transitions': [{'lt': 'T_disc_1', 'ss': 'S_disc_1', 'st': 'S_disc_2', 'E': '', 'A': '+2'}, {'lt': 'T_disc_2', 'ss': 'S_disc_2', 'st': 'S_disc_3', 'E': '', 'A': '+2'}, {'lt': 'T_disc_3', 'ss': 'S_disc_3', 'st': 'S_disc_4', 'E': '', 'A': '+2'}]}, 'Stor': {'States': [(S_disc_1, isInitial, {'replicas': '2'}), (S_disc_2, isNormal, {'replicas': '4'}), (S_disc_3, isNormal, {'replicas': '6'}), (S_disc_4, isNormal, {'replicas': '8'}), (S_disc_5, isFinal, {'replicas': '12'})], 'Transitions': [{'lt': 'T_disc_1', 'ss': 'S_disc_1', 'st': 'S_disc_2', 'E': '', 'A': '+2'}, {'lt': 'T_disc_2', 'ss': 'S_disc_2', 'st': 'S_disc_3', 'E': '', 'A': '+2'}, {'lt': 'T_disc_3', 'ss': 'S_disc_3', 'st': 'S_disc_4', 'E': '', 'A': '+2'}, {'lt': 'T_disc_4', 'ss': 'S_disc_4', 'st': 'S_disc_5', 'E': '', 'A': '-

<h2>Checker</h2>
In this last step, we implement the checker component. We construct the search space as defined in the paper. 

In [None]:
import pm4py
import networkx as nx

# For each discovered State-Machine
for SM in Discovered_SM:
    SS = nx.Graph()
    for eltx, elty in zip(Discovered_SM[SM]['States'], SM_defined.states):
        if eltx.Resourcerequirements == elty.Resourcerequirements:
            SS.add_node(str([eltx.name,elty.name]), weight=1)
        else :
            SS.add_node(str([eltx.name,'>>']), weight=5)
            SS.add_node(str(['>>',elty.name]), weight=5)
    print(SS)

From this space, we search the optimal alignment using an A* algorithm.

In [None]:
y_optimal = nx.shortest_path(SS, 'isInitial', 'isFinal')

Finally, we compute the fitness value of the identified alignment and return the report with the alignment.

In [None]:
y_worst_sum = sum(y_worst)
y_optimal_sum = sum(y_optimal)
fitnessValue = 1 - y_optimal/y_worst

print("Report : ")
print(f"Y_Optimal : {y_optimal}")
print(f"FitnessValue : {fitnessValue}")