## Libraries

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt 
import warnings
warnings.filterwarnings('ignore')
from neo4j import GraphDatabase

## Connect to Neo4J database

In [None]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Neo4J1234"))
session = driver.session()

## Retrieve data from database

In [None]:
query = '''match (e:Event {Activity: "Entrada Material Sucio"})
            return e.KitID, e.Código, e.Activity, e.timestamp, e.Usuario'''
result = session.run(query)
records = [record.values() for record in result]
columns = result.keys()
df = pd.DataFrame(records, columns=columns)
df['e.timestamp'] = pd.to_datetime(df['e.timestamp'], format='%Y-%m-%dT%H:%M:%S.%f%z')
df.head()

In [None]:
df_time = df.copy()
df_time.drop(columns=['e.KitID', 'e.Código'], inplace=True)
df_time.drop_duplicates(inplace=True)
df_time.sort_values(by='e.timestamp', inplace=True)
df_time.head()

In [None]:

time_diff = df_time['e.timestamp'].diff()
average_time_diff = time_diff.describe()
average_time_diff

In [None]:
df.sort_values(by='e.timestamp', inplace=True)
time_diff = df['e.timestamp'].diff()
average_time_diff = time_diff.describe()
average_time_diff

In [None]:
df.sort_values(by=['e.Usuario', 'e.timestamp'], inplace=True)

time_diff = df.groupby('e.Usuario')['e.timestamp'].diff()

incremental_value = 0
for index, row in df.iterrows():
    if time_diff[index] <= pd.Timedelta(minutes=1):
        incremental_value = 0
    else:
        incremental_value += 1
    df.at[index, 'incremental_value'] = incremental_value

df.head()

In [None]:
query = '''MATCH p=()-[r:DF_CYCLE]->() 
 return r.KitID, r.DurationBetweenSterilizations, r.CycleNumber'''
result = session.run(query)
records = [record.values() for record in result]
columns = result.keys()
df = pd.DataFrame(records, columns=columns)
df.head()

In [None]:
csv_file_path = "/Users/abdalrhman/Documents/bdma-thesis/python/data/duration_between_cycles.csv"
df.to_csv(csv_file_path, index=False)

In [None]:
df['DurationDifference'] = df.groupby('r.KitID')['r.DurationBetweenSterilizations'].diff()

fig, ax = plt.subplots(figsize=(10, 6))

for kit_id, group in df.groupby('r.KitID'):
    ax.bar(group['r.CycleNumber'], group['DurationDifference'], label=kit_id)

ax.set_xlabel('Cycle Number')
ax.set_ylabel('Difference in Duration Between Sterilizations')
ax.set_title('Difference in Duration Between Sterilizations by Cycle Number')
ax.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Sandbox

In [None]:
'''MATCH (e: Event {Activity: "Carga L+D iniciada"})
WITH e AS nodes ORDER BY e.timestamp, ID(e)
WITH collect(nodes) AS event_node_list
UNWIND range(0, size(event_node_list)-2) AS i
WITH event_node_list[i] AS e1, event_node_list[i+1] AS e2
where e1.batchID <> e2.batchID and e1.additionalInfo1 is not null
return apoc.agg.statistics(duration.between(e1.timestamp, e2.timestamp).minutes)
'''


'''MATCH ()-[r:DF_CYCLE]->()
where r.KitID = 'CONT-HOS.CV-1'
WITH collect(r.PreviousCaseID) as cids
match (r:Run)
where r.CaseID in cids 
return r.duration, r.activities
limit 100'''


'''MATCH ()-[r:DF_CYCLE]->()
where r.EndActivity in ("Carga L+D liberada", "Carga L+D iniciada", "Entrada Material Sucio", "Cargado en carro L+D")
WITH collect(r.KitID) as kids
MATCH ()-[r:DF_CYCLE]->()
where r.KitID in kids and r.EndActivity <> "Carga L+D liberada"
WITH collect(r.PreviousCaseID) as cids
match (r:Run)
where r.CaseID in cids and not (r.activities contains "Montaje" or r.activities contains "Producción montada" or r.activities contains "Composición de cargas" or r.activities contains "Carga de esterilizador liberada" or r.activities contains "Comisionado")
return r.ID, r.CaseID, r.duration, r.activities
limit 100'''


'''MATCH ()-[r:DF_CYCLE]->()
where r.EndActivity in ["Montaje", "Producción montada"]
WITH collect(r.KitID) as kids
MATCH ()-[r:DF_CYCLE]->()
where r.KitID in kids and not r.EndActivity in ["Entrada Material Sucio",  "Cargado en carro L+D", "Carga L+D iniciada", "Carga L+D liberada"]
WITH collect(r.PreviousCaseID) as pcids, collect(r.NextCaseID) as ncids
match (r:Run) <- [:CORR] - (e:LastKitEvent) - [:DF_CYCLE] -> (ev:Event)
where r.CaseID in pcids and not (r.activities contains "Composición de cargas" or r.activities contains "Carga de esterilizador liberada" or r.activities contains "Comisionado")
//return r.ID, r.CaseID, ev.CaseID, r.start_timestamp, r.end_timestamp, r.duration, r.activities
//order by r.ID, r.start_timestamp
with collect (ev.CaseID) as ecids
match (r:Run) 
where r.CaseID in ecids
return r.ID, r.CaseID, r.start_timestamp, r.end_timestamp, r.duration, r.activities
limit 100'''

'''MATCH ()-[r:DF_CYCLE]->()
where r.EndActivity in ["Carga L+D liberada", "Carga L+D iniciada", "Cargado en carro L+D"]
WITH collect(distinct r.KitID) as kids 
UNWIND range(0, size(kids)-1) AS i
with kids[i] as kid
match (k:Kit) - [:HAS_UNIT] -> (e:Entity {{ID: kid}})
set k.isWashingMachineContainer = True'''