In [18]:
import numpy as np
import pandas as pd
import os
import ast

In [19]:
# Load the data with the alimentation
data_alim = pd.read_csv('sncb_alimentation.csv', sep=';')
data_alim['events + summary'] = data_alim['events + summary'].apply(lambda x: ast.literal_eval(x))


In [20]:
# Load data in the \\results\\results3 folder
results3 = {}
for file in os.listdir('results/results3'):
    if file.endswith('.csv'):
        results3[file] = pd.read_csv('results/results3/' + file, sep=';')
        results3[file]['itemsets'] = results3[file]['itemsets'].apply(ast.literal_eval)


In [21]:
# Create list of all incidents
incidents = data_alim['incident_type'].unique()
print(incidents)


# Create list of all sequences
sequences = []
for i in incidents:
    for sequence in results3[f'results3_{i}.csv']['itemsets']:
        sequences.append(sequence)

[ 4 13 14  2 11 99  9 17  3 16  6  7]


In [2]:
a = [0, 1]
b = [1, 2]
a.extend(b)
print(a)
print(b)

[0, 1, 1, 2]
[1, 2]


In [22]:
#Compute relevance of each event where sequence is a list of events
h_all_class = np.zeros(len(sequences))
for i, sequence in enumerate(sequences):
    for row in data_alim['events + summary']:
        if set(sequence).issubset(set(row)):
            h_all_class[i] += 1
h_all_class = h_all_class/len(data_alim['events + summary'])

print(h_all_class)

[0.90603363 0.89119683 0.89119683 ... 0.70524233 0.29376855 0.18199802]


In [23]:
#Compute relevance of each event for each incident
relevance = {}
relevance_best_case = {}
relevance_worst_case = {}
for incident in incidents:
    df_i = data_alim[data_alim['incident_type'] == incident]
    file = f'results3_{incident}.csv'
    h_in = np.zeros(len(sequences))
    for i, sequence in enumerate(sequences):
        for row in range(len(results3[file])):
            if sequence == results3[file]['itemsets'][row]:
                results3[file]['support'][row]
                h_in[i] = results3[file]['support'][row]
                break
            
    h_in_best_case = np.zeros(len(sequences))
    h_in_worst_case = np.zeros(len(sequences))
    for i, sequence in enumerate(sequences):
        for row in df_i['events_sequence']:
            h_in_best_case[i] += 1
        h_in_worst_case[i] = 1

    h_in_best_case = h_in_best_case/len(df_i)
    h_in_worst_case = h_in_worst_case/len(df_i)
    relevance[incident] = h_in/h_all_class
    relevance_best_case[incident] = h_in_best_case/h_all_class
    relevance_worst_case[incident] = h_in_worst_case/h_all_class



In [None]:
"""
# Find the most relevant incident for each event
relevance_max = []
for i, sequence in enumerate(sequences):
    max = 1
    for incident in incidents:
        if relevance[incident][i] > max:
            sequence, max,incident = sequence, relevance[incident][i],incident
    relevance_max.append((sequence,max,incident))
    # sort the list by event
relevance_max = sorted(relevance_max, key=lambda x: x[0])
"""
# Find the FIVE most relevant event for each 
relevance_max_incident = {}
for incident in incidents:
    relevance_max_incident[incident] = []
    for i, sequence in enumerate(sequences):
            relevance_max_incident[incident].append((sequence,relevance[incident][i], i, relevance[incident][i]*h_all_class[i]*len(data_alim[data_alim['incident_type'] == incident])))
    relevance_max_incident[incident] = sorted(relevance_max_incident[incident], key=lambda x: x[1],reverse=True)
    
    print(relevance_max_incident[incident])
    # Supprimer les doublons en se basant uniquement sur le premier élément du premier tuple (tuple[0][0])
    seen = set()
    relevance_max_incident[incident] = [
        item for item in relevance_max_incident[incident]
        if item[0][0] not in seen and not seen.add(item[0][0])
    ]



[([(2708, 'DC')], 1.047111185757474, 0), ([(2708, 'DC')], 1.047111185757474, 6), ([(2708, 'DC')], 1.047111185757474, 10), ([(2708, 'DC')], 1.047111185757474, 225), ([(2708, 'DC')], 1.047111185757474, 226), ([(2708, 'DC')], 1.047111185757474, 227), ([(2708, 'DC')], 1.047111185757474, 290), ([(2708, 'DC')], 1.047111185757474, 353), ([(2708, 'DC')], 1.047111185757474, 1096), ([(2708, 'DC')], 1.047111185757474, 2868), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 4), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 13), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 228), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 291), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 354), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 1074), ([(2708, 'DC'), (4066, 'DC')], 1.0422075093649272, 2880), ([(2708, 'DC'), (4026, 'DC')], 1.0375075882403955, 3), ([(2708, 'DC'), (4026, 'DC')], 1.0375075882403955, 14), ([(2708, 'DC'), (4026, 'DC')], 1.0375075882403955, 262), ([(2708, 'DC'), 

In [25]:
# print the 5 most relevant events for each incident
for incident in incidents:
    print(incident)
    for i in range(5):
        if relevance_max_incident[incident][i][1] > 0:
            print(f" max relevance {relevance_best_case[incident][relevance_max_incident[incident][i][2]]}")
            print(relevance_max_incident[incident][i])
            print(f" min relevence {relevance_worst_case[incident][relevance_max_incident[incident][i][2]]}")
            print()
    print('end ===============================================================')

#print relevance of each event
#relevance_max = np.array(relevance_max)
#print(relevance_max)
#print(events[np.argmax(relevance_max)])
#print(events[np.argmin(relevance_max)])
#print(events)

4
 max relevance 1.1037117903930131
([(2708, 'DC')], 1.047111185757474, 0)
 min relevence 0.014150151158884782

 max relevance 1.1220865704772476
([(4026, 'DC')], 1.0213864936395458, 1)
 min relevence 0.014385725262528813

 max relevance 1.1220865704772476
([(4066, 'DC')], 1.0213864936395458, 2)
 min relevence 0.014385725262528813

13
 max relevance 1.1220865704772476
([(4026, 'DC')], 1.0374007915733041, 1)
 min relevence 0.003528574120997634

 max relevance 1.1321388577827547
([(4068, 'DC')], 1.0288934902491071, 8)
 min relevence 0.0035601850873671534

 max relevance 1.1220865704772476
([(4066, 'DC')], 1.0268150692103115, 2)
 min relevence 0.003528574120997634

 max relevance 1.1037117903930131
([(2708, 'DC')], 1.0204127873444837, 0)
 min relevence 0.0034707917936887206

14
 max relevance 1.4442857142857144
([(4140, 'Battery')], 1.3570469798657718, 9)
 min relevence 0.00969319271332694

2
 max relevance 1.6934673366834172
([(4016, 'DC')], 1.5511591571301888, 141)
 min relevence 0.0142

In [26]:
# Ouvrir le fichier en mode écriture
with open('Relevance_event alim sequence.txt', 'w') as file:
    for incident in incidents:
        # Écrire l'incident dans le fichier
        file.write(f"{incident}\n")
        
        for i in range(len(relevance_max_incident[incident])):
            if relevance_max_incident[incident][i][1] > 0:
                # Écrire les informations pertinentes dans le fichier
                file.write(f" max relevance: {relevance_best_case[incident][relevance_max_incident[incident][i][2]]}\n")
                file.write(f"{relevance_max_incident[incident][i]}\n")
                file.write(f" min relevance: {relevance_worst_case[incident][relevance_max_incident[incident][i][2]]}\n")
                file.write("\n")
        
        # Écrire les séparateurs
        file.write('====================================================== END ======================================================\n')
        file.write('=============================================================================================================\n')