In [1]:
import numpy as np
import pandas as pd 
import random 
import copy 
import warnings
import sys
import pickle

from tqdm import tqdm
from copy import deepcopy
from sklearn.ensemble import IsolationForest

sys.path.append('../../')
from ACME.ACME import ACME
from ACME.visual_utils import * 
sys.path.remove('../../')

warnings.filterwarnings('ignore')

# set seed for reproducibility
np.random.seed(0)
random.seed(0)

# AcME-AD to explain Isolation Forest in PIADE sequences dataset

In [2]:
# import data
data = pd.read_csv('ad_industrial_datasets/piade_sequences_1h_data.csv')
data = data.drop(columns=['interval_start'])
data = data.fillna(0)

features = [c for c in data.columns if c != 'equipment_ID']

In [3]:
# split data based on equipment ID
equipments = data['equipment_ID'].unique()
data_equipment = [data[data['equipment_ID'] ==e] for e in equipments]

## Model training

In [4]:
def if_score_function(model, X): 
    return 0.5 * (-model.decision_function(X) + 1)

ad_models = []
for i in range(len(equipments)):
    ad_model = IsolationForest().fit(data_equipment[i][features])
    ad_models.append(ad_model)

    data_equipment[i]['Score'] = if_score_function(ad_model, data_equipment[i][features])
    data_equipment[i]['Prediction'] = ad_model.predict(data_equipment[i][features])
    data_equipment[i]['Prediction'] = data_equipment[i]['Prediction'].apply(lambda x: 1 if x == -1 else 0)

## Explanations

In [5]:
data_to_explain = []
for i in range(len(equipments)): 
    data_to_explain.append(data_equipment[i][data_equipment[i]['Prediction'] == 1])
    print('Anomalies to explain in equipment', i+1, ':', len(data_to_explain[i]))

Anomalies to explain in equipment 1 : 41
Anomalies to explain in equipment 2 : 26
Anomalies to explain in equipment 3 : 88
Anomalies to explain in equipment 4 : 12
Anomalies to explain in equipment 5 : 12


Compute and save explanations for all the equipments

In [6]:
# # comment this part if you want to use the our saved explanations explanations
# acme_rankings = [pd.DataFrame(columns=features) for _ in range(len(equipments))]
# acme_local_explanations = []

# for i in range(len(equipments)):
#     print('Explaining equipment', i+1, 'with AcME-AD')
#     acme_exp = ACME(ad_models[i], 'Score', features=features, task = 'ad', score_function=if_score_function)
#     acme_exp = acme_exp.explain(data_equipment[i], robust = True)

#     acme_local_table = {}
#     for j in tqdm(range(len(data_to_explain[i]))):
#         local_explanation = acme_exp.explain_local(data_to_explain[i].iloc[j]) 
#         feature_table = local_explanation.feature_importance(local=True, weights={"delta":0.3, "ratio":0.2, "change":0.3, "distance":0.2})
#         acme_rankings[i].loc[j] = feature_table['importance'].rank(ascending=False, method='min')
        
#         acme_local_table[j] = local_explanation
#     acme_local_explanations.append(acme_local_table)

In [7]:
# uncomment this part if you want to load the saved explanations
# load results 
with open('results/piade_if_acme_rankings.pkl', 'rb') as f:
    acme_rankings = pickle.load(f)

with open('results/piade_if_acme_local_explanations.pkl', 'rb') as f:
    acme_local_explanations = pickle.load(f)

For each equipment plot the stacked barplot. 

Note: features that are ranked in the k-th positon less than t% of the anomalies are merged in a unique 'other' group. In this case t% = 5%, but different threshold can be taken depending on the application. 

In [8]:
# custom colors for the results in the paper
feature_colors = {
    "#changes": {"color": "midnightblue", "pattern": ""},
    "%idle": {"color": "limegreen", "pattern": "+"},
    "A_004": {"color": "turquoise", "pattern": "-"},
    "A_005": {"color": "#858be3", "pattern": ""},
    "A_008": {"color": "#ff8200", "pattern": "-"},
    "A_010": {"color": "deepskyblue", "pattern": "."},
    "A_012": {"color": "#a777f1", "pattern": "\\"},
    "A_017": {"color": "lightslategray", "pattern": "|"},
    "A_020": {"color": "#665cfa", "pattern": "x"},
    "A_027": {"color": "#008bff", "pattern": "+"},
    "count_sum": {"color": "crimson", "pattern": ""},
    "downtime/downtime": {"color": "#4af5f9", "pattern": "/"},
    "idle/idle": {"color": "wheat", "pattern": "x"},
    "idle/performance_loss": {"color": "lavender", "pattern": "."},
    "performance_loss/performance_loss": {"color": 'rosyBrown', "pattern": "|"},
    "production/production": {"color": "steelBlue", "pattern": ""},
    "production/scheduled_downtime": {"color": "#00cc7c", "pattern": ""},
    "scheduled_downtime/downtime": {"color": "#ab63fa", "pattern": "."},
}

In [9]:
for i in range(len(equipments)): 
    acme_rank_counting = acme_rankings[i].apply(pd.Series.value_counts).fillna(0).astype(int)
    acme_rank_counting = acme_rank_counting / acme_rank_counting.sum()

    if i==1:
        # just custom colors
        acme_stacked_barplot_fig = feature_importance_distribution_barplot(acme_rank_counting, title='Equipment ' + str(i+1), n_positions = 5, threshold=0.05, color_pattern_dict=feature_colors)
        acme_stacked_barplot_fig.show()

    else: 
        acme_stacked_barplot_fig = feature_importance_distribution_barplot(acme_rank_counting, title='Equipment ' + str(i+1), n_positions = 5, threshold=0.05)
        acme_stacked_barplot_fig.show()

## Local summary plot

In [10]:
for i in range(len(equipments)): 
    print("Equipment ", i+1)
    k = random.choice(list(acme_local_explanations[i].keys()))
    acme_loc_summary_plot = acme_local_explanations[i][k].summary_plot(local=True, n_features = 10)
    # acme_loc_summary_plot.update_layout(width = 100 * 15, height = 100 * 6, font = dict(size=18))
    acme_loc_summary_plot.show()

Equipment  1
Using default weights for anomaly detection feature importance


Equipment  2
Using default weights for anomaly detection feature importance


Equipment  3
Using default weights for anomaly detection feature importance


Equipment  4
Using default weights for anomaly detection feature importance


Equipment  5
Using default weights for anomaly detection feature importance
