In [1]:
import numpy as np
import pandas as pd
import os
import ast

In [2]:
# Load the data with the alimentation
data_alim = pd.read_csv('sncb_alimentation.csv', sep=';')
data_alim['events_sequence'] = data_alim['events_sequence'].apply(lambda x: list(map(int, x.strip('[]').split(','))))

In [3]:
# Load data in the \\results\\results folder
results = {}
for file in os.listdir('results/results'):
    if file.endswith('.csv'):
        results[file] = pd.read_csv('results/results/' + file, sep=';')
        results[file]['itemsets'] = results[file]['itemsets'].apply(ast.literal_eval)


In [4]:
# Create list of all incidents
incidents = data_alim['incident_type'].unique()
print(incidents)


# Create list of all sequences
sequences = []
for i in incidents:
    for sequence in results[f'results_{i}.csv']['itemsets']:
        if sequence not in sequences:
            sequences.append(sequence)

[ 4 13 14  2 11 99  9 17  3 16  6  7]


In [5]:
print(sequences)

[[4026], [2708], [4026, 2708], [4148], [2708, 4148], [4026, 4148], [2708, 4026, 4148], [4068, 2708], [4026, 2708, 4066], [4026, 4066], [4068, 4026, 2708], [4026, 4068], [4066, 2708], [4066], [4068], [2708, 2742], [2742], [4066, 4068], [2708, 4066, 4068], [4026, 4068, 4066], [2708, 4026, 4068, 4066], [4026, 2742], [4026, 2708, 2742], [2708, 4026, 4394], [4120, 4026], [4120, 2708], [4120], [4026, 4394], [4394, 2708], [4394], [4120, 4026, 2708], [4026, 4066, 2708], [4140], [2708, 4140], [4168, 2708], [4168], [4168, 2708, 4140], [4168, 4140], [2708, 4026, 4140], [4026, 4140], [4168, 4026], [4168, 4026, 2708], [4168, 4026, 4140], [4168, 2708, 4026, 4140], [4016, 2708], [3658, 4066], [3658, 2708], [4068, 4066, 2708], [4016], [3658], [3658, 2708, 4066], [2708, 3658, 3636], [4016, 4026, 2708], [2708, 4066, 3636, 3658], [3658, 3636], [4026, 2708, 3658, 4066], [2708, 4066, 3636], [4026, 3658, 4066], [4026, 3658], [2708, 3636], [3658, 3636, 4066], [4026, 2708, 3658], [4016, 4026], [4066, 3636], [

In [6]:
# Compute relevance of each event where sequence is a list of events
h_all_class = np.zeros(len(sequences))
for i, sequence in enumerate(sequences):
    for row in data_alim['events_sequence']:
        if set(sequence).issubset(set(row)):
            h_all_class[i] += 1
h_all_class = h_all_class/len(data_alim['events_sequence'])

print(h_all_class)

[0.9851632  0.99307616 0.98417409 ... 0.59347181 0.5727003  0.18991098]


In [7]:
#Compute relevance of each event for each incident
relevance = {}
relevance_best_case = {}
relevance_worst_case = {}
for incident in incidents:
    df_i = data_alim[data_alim['incident_type'] == incident]
    file = f'results_{incident}.csv'
    h_in = np.zeros(len(sequences))
    for i, sequence in enumerate(sequences):
        for row in range(len(results[file])):
            if sequence == results[file]['itemsets'][row]:
                h_in[i] = results[file]['support'][row]
                break
            
    h_in_best_case = np.zeros(len(sequences))
    h_in_worst_case = np.zeros(len(sequences))
    for i, sequence in enumerate(sequences):
        for row in df_i['events_sequence']:
            h_in_best_case[i] += 1
        h_in_worst_case[i] = 1

    h_in_best_case = h_in_best_case/len(df_i)
    h_in_worst_case = h_in_worst_case/len(df_i)
    relevance[incident] = h_in/h_all_class
    relevance_best_case[incident] = h_in_best_case/h_all_class
    relevance_worst_case[incident] = h_in_worst_case/h_all_class



In [8]:
"""
# Find the most relevant incident for each event
relevance_max = []
for i, sequence in enumerate(sequences):
    max = 1
    for incident in incidents:
        if relevance[incident][i] > max:
            sequence, max,incident = sequence, relevance[incident][i],incident
    relevance_max.append((sequence,max,incident))
    # sort the list by event
relevance_max = sorted(relevance_max, key=lambda x: x[0])
"""
# Find the FIVE most relevant event for each 
relevance_max_incident = {}
for incident in incidents:
    relevance_max_incident[incident] = []
    for i, sequence in enumerate(sequences):
            relevance_max_incident[incident].append((sequence,relevance[incident][i], i))
    relevance_max_incident[incident] = sorted(relevance_max_incident[incident], key=lambda x: x[1],reverse=True)
    
    print(relevance_max_incident[incident])
    # Supprimer les doublons en se basant uniquement sur le premier élément du premier tuple (tuple[0][0])
    seen = set()
    """relevance_max_incident[incident] = [
        item for item in relevance_max_incident[incident]
        if item[0][0] not in seen and not seen.add(item[0][0])
    ]"""



[([2708, 4026, 4394], 1.2888924800689507, 23), ([4026, 4394], 1.2870898332436795, 27), ([4394, 2708], 1.279929389108805, 28), ([4394], 1.2728481753378018, 29), ([2708, 4026, 4148], 1.0610108917631043, 6), ([4026, 4148], 1.0598385040373992, 5), ([2708, 4148], 1.0528582284894032, 4), ([4148], 1.0459692978776949, 3), ([4120, 4026], 1.0386785900329918, 24), ([4120, 4026, 2708], 1.0386785900329918, 30), ([4120, 2708], 1.030536652597123, 25), ([4120], 1.0282337773957886, 26), ([4026, 2742], 1.0277871907827858, 21), ([4026, 2708, 2742], 1.0277871907827858, 22), ([2708, 2742], 1.0221585643272393, 15), ([2742], 1.017699857394514, 16), ([4026, 2708], 1.0160804020100502, 2), ([4026, 4068, 4066], 1.0154850590106304, 19), ([2708, 4026, 4068, 4066], 1.0154850590106304, 20), ([4026], 1.0150602409638554, 0), ([4068, 4026, 2708], 1.0141396652650674, 10), ([4026, 4068], 1.0141396652650674, 11), ([4026, 2708, 4066], 1.0076595396084216, 8), ([4026, 4066], 1.0076595396084216, 9), ([2708], 1.006972111553784

In [9]:
# print the 5 most relevant events for each incident
for incident in incidents:
    print(incident)
    for i in range(5):
        if relevance_max_incident[incident][i][1] > 0:
            print(f" max relevance {relevance_best_case[incident][relevance_max_incident[incident][i][2]]}")
            print(relevance_max_incident[incident][i])
            print(f" min relevence {relevance_worst_case[incident][relevance_max_incident[incident][i][2]]}")
            print()
    print('====================================================== END ======================================================')
    print('=============================================================================================================')


4
 max relevance 1.415966386554622
([2708, 4026, 4394], 1.2888924800689507, 23)
 min relevence 0.018153415212238743

 max relevance 1.413986013986014
([4026, 4394], 1.2870898332436795, 27)
 min relevence 0.018128025820333512

 max relevance 1.4061196105702365
([4394, 2708], 1.279929389108805, 28)
 min relevence 0.018027174494490213

 max relevance 1.3983402489626555
([4394], 1.2728481753378018, 29)
 min relevence 0.017927439089264814

 max relevance 1.1183628318584071
([2708, 4026, 4148], 1.0610108917631043, 6)
 min relevence 0.014337985023825732

13
 max relevance 1.0698412698412698
([4068], 1.0059199361086153, 14)
 min relevence 0.0033642807227712893

 max relevance 1.0619747899159664
([4066], 1.0018630093546854, 13)
 min relevence 0.003339543364515618

 max relevance 1.0859291084854994
([4066, 4068], 1.0005573232272056, 17)
 min relevence 0.0034148714103317593

 max relevance 1.0732484076433122
([4068, 2708], 0.9989985178063535, 7)
 min relevence 0.003374994992589032

 max relevance

In [10]:
# Ouvrir le fichier en mode écriture
with open('Relevance_event_sequence.txt', 'w') as file:
    for incident in incidents:
        # Écrire l'incident dans le fichier
        file.write(f"{incident}\n")
        
        for i in range(len(relevance_max_incident[incident])):
            if relevance_max_incident[incident][i][1] > 0:
                # Écrire les informations pertinentes dans le fichier
                file.write(f" max relevance: {relevance_best_case[incident][relevance_max_incident[incident][i][2]]}\n")
                file.write(f"{relevance_max_incident[incident][i]}\n")
                file.write(f" min relevance: {relevance_worst_case[incident][relevance_max_incident[incident][i][2]]}\n")
                file.write("\n")
        
        # Écrire les séparateurs
        file.write('====================================================== END ======================================================\n')
        file.write('=============================================================================================================\n')
