## Libraries

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import warnings
warnings.filterwarnings('ignore')
from neo4j import GraphDatabase

## Connect to Neo4J

In [None]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Neo4J1234"))
session = driver.session()

## Create entities and relate them to events

In [None]:
query = f'''MATCH (e:Event) UNWIND e.KitID AS id_val
WITH DISTINCT id_val
MERGE (:Entity {{ID:id_val, EntityType:"Kit"}})'''
session.run(query).single()

In [None]:
query = f'''MATCH (e:Event) UNWIND e.KitID AS kitID WITH e,kitID
MATCH (n:Entity {{EntityType: "Kit"}}) WHERE kitID = n.ID
MERGE (e)-[:CORR]->(n)'''
session.run(query).single()

## Create directly follows relationships between events related to the same entity

In [None]:
query = f'''MATCH (n:Entity {{EntityType:"Kit"}})
MATCH (n)<-[:CORR]-(e)
WITH n, e AS nodes ORDER BY e.timestamp, ID(e)
WITH n, collect(nodes) AS event_node_list
UNWIND range(0, size(event_node_list)-2) AS i
WITH n, event_node_list[i] AS e1, event_node_list[i+1] AS e2
MERGE (e1)-[df:DF {{EntityType:n.EntityType, ID:n.ID, Duration: duration.between(e1.timestamp , e2.timestamp).seconds}}]->(e2)
'''
session.run(query)

## Create sterilization cycles as runs for each entity

In [None]:
#add start label to Entrada Material Sucio events
query = f'''MATCH (e:Event)
WHERE e.Activity =  "Entrada Material Sucio"
OPTIONAL MATCH (f:Event) - [:DF] -> (e)
SET e:NewKitEvent, f:LastKitEvent
 '''
session.run(query).single()

In [None]:
#add end label to commisionada events
query = f'''MATCH (e:Event)
WHERE e.Activity =  "Comisionado"
SET e:LastKitEvent
 '''
session.run(query).single()

In [None]:
#add end label to commisionada events
query = f'''MATCH (e:Event:LastKitEvent) - [:DF] -> (f:Event)
where f.Activity <> "Comisionado"
set f:NewKitEvent
 '''
session.run(query).single()

In [None]:
#Find paths from start to end and create Run nodes
query = f'''MATCH (start_event:Event:NewKitEvent)
CALL apoc.path.expandConfig(start_event, {{relationshipFilter: "DF>", 
labelFilter:"+Event|/LastKitEvent"}})
YIELD path
MATCH (start_event) - [:CORR] -> (k:Entity)
CALL apoc.refactor.cloneNodes([k])
YIELD input, output as new_k, error
SET new_k:Run
remove new_k:Entity
CREATE (new_k) - [:HAS_RUN] -> (k)
WITH nodes(path) as events, k, new_k
            CALL {{WITH events, k, new_k
             UNWIND events as  kit_event
            MATCH (kit_event) - [r:CORR] -> (k)
            DELETE r
            CREATE (kit_event) - [:CORR] -> (new_k)}}
 '''
session.run(query).single()

In [None]:
#Find nodes that are followed by Entrada directly
query = f'''MATCH (start_event:Event:NewKitEvent:LastKitEvent)
            MATCH (start_event) - [:CORR] -> (k:Entity)
            CALL apoc.refactor.cloneNodes([k])
            YIELD input, output as new_k, error
            SET new_k:Run
            remove new_k:Entity
            CREATE (new_k) - [:HAS_RUN] -> (k)
            WITH start_event, k, new_k
            CALL {{WITH start_event, k, new_k
            MATCH (start_event) - [r:ACTS_ON] -> (k)
            DELETE r
            CREATE (start_event) - [:ACTS_ON] -> (new_k)}}
'''
session.run(query).single()

In [None]:
query = f'''MATCH (startEvent:NewKitEvent)-[:CORR]->(c:Run) 
MATCH (endEvent:LastKitEvent)-[:CORR]->(c) 
Match path = (startEvent) - [:DF*] -> (endEvent)
set c.start_timestamp = startEvent.timestamp, 
c.end_timestamp=endEvent.timestamp, 
c.activities = startEvent.Activity
with tail(nodes(path)) as pathNodes, c
foreach (event in pathNodes | 
                set c.activities = c.activities + ", " + event.Activity)
'''
session.run(query).single()

In [None]:
query = f'''
Match (e:Entity) 
WITH  e AS cases ORDER BY e.start_timestamp, ID(e)
WITH collect(cases) AS cases_list
UNWIND range(0, size(cases_list)-1) AS i
with cases_list[i] as node, i
Match (ev:Event) - [:CORR] -> (node)
set node.CaseID = node.ID + "-CN" + i,
ev.CaseID = node.ID + "-CN" + i
'''
session.run(query).single()

In [None]:
query = f'''match (r:Run)
return r.CaseID, r.start_timestamp, r.end_timestamp, r.start_timestamp.month
'''
frame = pd.DataFrame(session.run(query).data())
frame['r.start_timestamp'] = pd.to_datetime(frame['r.start_timestamp'], format='%Y-%m-%dT%H:%M:%S.%f%z')
frame['r.end_timestamp'] = pd.to_datetime(frame['r.end_timestamp'], format='%Y-%m-%dT%H:%M:%S.%f%z')
frame['durationDays'] = (frame['r.end_timestamp'] - frame['r.start_timestamp']).dt.total_seconds() / 60 / 60 / 24
frame['durationMinutes'] = (frame['r.end_timestamp'] - frame['r.start_timestamp']).dt.total_seconds() / 60 
for index, row in frame.iterrows():
    query = f'''match (r:Run {{CaseID: "{row['r.CaseID']}"}})
                set r.durationInMinutes = {row['durationMinutes']}
            '''
    session.run(query)

In [None]:
query = f'''match (e:Entity)<-[:HAS_RUN]-(r:Run)
with count(r) as numberOfCycles, e
set e.numberOfCycles = numberOfCycles
return e.ID, numberOfCycles
'''
session.run(query).single()

## Make Kit nodes

In [None]:
query = f'''match (e:Event)
UNWIND e.`Código` AS id
with distinct (id), count(distinct(e.NS)) as num
merge (k:Kit {{ID :id, numberOfUnits :num}})
return id, num'''
session.run(query).single()

In [None]:
query = f'''MATCH (e:Event)
MATCH (n:Entity)<- [:HAS_RUN] - (r:Run) 
where e.KitID = n.ID
set n.KitID = e.`Código`'''
session.run(query).single()

In [None]:
query = f'''MATCH (n:Entity) <- [:HAS_RUN] - (r:Run) 
Match (k:Kit)
where n.KitID = k.ID
merge (n) <- [:HAS_UNIT] - (k)
REMOVE n.KitID '''
session.run(query).single()

## Extract the process model

In [None]:
query = f'''MATCH ( e : Event ) WITH distinct e.Activity AS actName
MERGE ( c : Class {{ Name:actName, Type:"Activity", ID: actName}})'''
session.run(query).single()

In [None]:
query = f'''MATCH ( c : Class ) WHERE c.Type = "Activity"
MATCH ( e : Event ) WHERE c.Name = e.Activity
MERGE ( e ) -[:OBSERVED]-> ( c )'''
session.run(query).single()

In [None]:
query = f'''MATCH ( c : Class ) WHERE c.Type = "Activity"
MATCH ( e : Event ) WHERE c.Name = e.Activity
match (en:Entity) <-[:CORR]- (e)
MERGE ( en) -[:LOGGED_IN]-> ( c )'''
session.run(query).single()

In [None]:
query = f'''match (e:Entity)<-[:HAS_RUN]-(r:Run)
MATCH ( c1 : Class ) <-[:OBSERVED]- ( e1 : Event ) -[df:DF]-> ( e2 : Event ) -[:OBSERVED]-> ( c2 : Class )
MATCH (e1) -[:CORR] -> (r) <-[:CORR]- (e2)
WHERE c1.Type = c2.Type AND r.EntityType = df.EntityType
WITH r.EntityType as EType,c1,count(df) AS df_freq,c2
MERGE ( c1 ) -[rel2:DF_C {{EntityType:EType}}]-> ( c2 ) ON CREATE SET rel2.count=df_freq'''
session.run(query).single()

## Add employees

In [None]:
query = f'''MATCH ( e : Event ) UNWIND e.Usuario AS employee 
WITH distinct employee
MERGE ( :Employee {{ Name:employee, Type:"Usuario", ID: employee}})'''
session.run(query).single()

In [None]:
query = f'''MATCH ( m : Employee ) WHERE m.Type = "Usuario"
MATCH ( e : Event ) WHERE m.Name = e.Usuario
CREATE ( e ) <-[:WORKED_ON]- ( m )'''
session.run(query).single()

In [None]:
query = f'''MATCH (emp:Employee)
MATCH (emp)-[:WORKED_ON]->(e)
WITH emp, e AS nodes ORDER BY e.timestamp, ID(e)
WITH emp, collect(nodes) AS event_node_list
UNWIND range(0, size(event_node_list)-2) AS i
WITH emp, event_node_list[i] AS e1, event_node_list[i+1] AS e2
MERGE (e1)-[df:DF_EMP {{ID:emp.ID, startTimestamp: e1.timestamp, endTimestamp: e2.timestamp}}]->(e2)
'''
session.run(query).single()

## Add washing machines

In [None]:
query=f'''
MATCH (e:Event) UNWIND e.additionalInfo5 AS rack
WITH DISTINCT rack, SPLIT(rack, ' - ') AS result
with rack, result, apoc.text.regexGroups(result[0], '([0-9]+)')[0][0] AS rackNumber
MERGE (:Rack {{ID:rack, name: result[0], number:rackNumber, activity: result[1], EntityType:"Rack"}})
'''
session.run(query).single()

In [None]:
query=f'''
MATCH (e:Event) 
where e.Activity = "Carga L+D iniciada" or e.Activity = "Carga L+D liberada"
UNWIND e.additionalInfo1 AS wm
WITH DISTINCT wm, SPLIT(wm, ' - ') AS result, e
MERGE (:WashingMachine {{activity: rtrim(result[0]), machine: COALESCE(rtrim(result[1]), rtrim(e.additionalInfo2)), EntityType:"Washing Machine"}})
'''
session.run(query).single()

In [None]:
query = f'''
Match (r:Rack) 
Match (w:WashingMachine)
MATCH (e:Event) 
where e.Activity = "Carga L+D iniciada" or e.Activity = "Carga L+D liberada"
with SPLIT(e.additionalInfo1, ' - ') AS wm, e.additionalInfo5 as rack, r, w
where rack = r.ID and w.activity = wm[0]
merge (r) -[:RELATED_TO] -> (w)
'''
session.run(query).single()

In [None]:
query = f'''Match (w:WashingMachine)
MATCH (e:Event) 
where e.Activity = "Carga L+D iniciada" or e.Activity = "Carga L+D liberada"
with SPLIT(e.additionalInfo1, ' - ') AS wm, w, e
where w.activity = wm[0]
create (e) -[:WASHED_IN {{timestamp: e.timestamp}}] -> (w)
'''
session.run(query).single()

In [None]:
query = f'''Match (r:Rack)
MATCH (e:Event) 
where e.Activity = "Carga L+D iniciada" or e.Activity = "Carga L+D liberada"
with e.additionalInfo5 as rack, r, e
where rack = r.ID
create (e) -[:WASHED_ON {{timestamp: e.timestamp}}] -> (r)'''
session.run(query).single()

## Add sterilization machines

In [None]:
query=f'''MATCH (e:Event) 
where e.Activity = "Composición de cargas" or e.Activity = "Carga de esterilizador liberada"
UNWIND e.additionalInfo1 AS sm
WITH DISTINCT sm, SPLIT(sm, ' - ') AS result, e
MERGE (:SterilizationMachine {{activity: rtrim(result[0]), machine: rtrim(result[1]), EntityType:"Sterilization Machine",
 type: 
 case 
 when rtrim(toLower(result[1])) in ['amsco vpro', 'amsco eagle'] then 'Low Temperature Sterilization'
     else 'High Temperature Sterilization' end}})
     '''
session.run(query).single()

In [None]:
query = f'''Match (s:SterilizationMachine)
MATCH (e:Event) 
where e.Activity = "Composición de cargas" or e.Activity = "Carga de esterilizador liberada"
with SPLIT(e.additionalInfo1, ' - ') as sm, s, e
where rtrim(sm[1]) = s.machine
create (e) -[:STERILIZED_IN {{timestamp: e.timestamp}}] -> (s)
'''
session.run(query).single()

## Batch activities

In [None]:
#TODO check batches timestamp and the entry time for entrada
#TODO make separate batching relation for washing machines and sterilization machines
query=f'''
Match (e1:Event) - [:OBSERVED] -> (c:Class) <- [:OBSERVED] - (e2)
match (e1) -[:DF_EMP] - (e2)
where e1.timestamp = e2.timestamp
create (e1) - [:DF_BATCH {{Activity: c.Name}}] -> (e2)
'''
session.run(query).single()