# Feature Importance

In this file I use following methods to find out important features in data.

1. Perturbation : Adding noise in the feature checking how the model performance is deteriorated
2. Missing Values : Replacing feature values with 0 and checking how the model performance is deteriorated
3. Permutation : Shuffling the feature values and checking how the model performance is deteriorated
4. Shap Feature Importance: Using Shap's DeepExplainer to see time-step wise feature importance


### Importing Required Libraries and Data

In [1]:
!pip install shap



In [2]:
import pandas as pd
import numpy as np

#to plot the data
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

import time
import os
# os.chdir("C:/Data/aircraft/")

from sklearn.preprocessing import MinMaxScaler #to normalize data
from sklearn.metrics import classification_report, confusion_matrix, roc_curve
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, precision_score, recall_score
from sklearn.metrics import mean_squared_error

#for deep learning
import keras
import keras.backend as k
from keras.models import Sequential
from keras.layers import Dense, LSTM, Activation, Masking, Dropout
from keras.optimizers import RMSprop, Adam
from keras.callbacks import History
from keras import callbacks

In [3]:
import tensorflow as tf
tf.compat.v1.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [4]:
def prepare_data(drop_cols = True):
    dependent_var = ['RUL']
    index_columns_names =  ["UnitNumber","Cycle"]
    operational_settings_columns_names = ["OpSet"+str(i) for i in range(1,4)]
    sensor_measure_columns_names =["SensorMeasure"+str(i) for i in range(1,22)]
    input_file_column_names = index_columns_names + operational_settings_columns_names + sensor_measure_columns_names

    cols_to_drop = ['OpSet3', 'SensorMeasure1', 'SensorMeasure5', 'SensorMeasure6', 'SensorMeasure10', 'SensorMeasure14',
     'SensorMeasure16', 'SensorMeasure18', 'SensorMeasure19']

    df_train = pd.read_csv('https://raw.githubusercontent.com/ericlrf/rul/main/CMAPSSData/train_FD001.txt',delim_whitespace=True,names=input_file_column_names)

    rul = pd.DataFrame(df_train.groupby('UnitNumber')['Cycle'].max()).reset_index()
    rul.columns = ['UnitNumber', 'max']
    df_train = df_train.merge(rul, on=['UnitNumber'], how='left')
    df_train['RUL'] = df_train['max'] - df_train['Cycle']
    df_train.drop('max', axis=1, inplace=True)

    df_test = pd.read_csv('https://raw.githubusercontent.com/ericlrf/rul/main/CMAPSSData/test_FD001.txt', delim_whitespace=True, names=input_file_column_names)
    
    if(drop_cols == True):
        df_train = df_train.drop(cols_to_drop, axis = 1)
        df_test = df_test.drop(cols_to_drop, axis = 1)

    y_true = pd.read_csv('https://raw.githubusercontent.com/ericlrf/rul/main/CMAPSSData/RUL_FD001.txt', delim_whitespace=True,names=["RUL"])
    y_true["UnitNumber"] = y_true.index
    
    return df_train, df_test, y_true

In [5]:
df_train, df_test, y_true = prepare_data(drop_cols=False)
df_train.shape, df_test.shape, y_true.shape

((20631, 27), (13096, 26), (100, 2))

In [6]:
feats = df_train.columns.drop(['UnitNumber', 'Cycle', 'RUL'])

In [7]:
min_max_scaler = MinMaxScaler(feature_range=(-1,1))

df_train[feats] = min_max_scaler.fit_transform(df_train[feats])
df_test[feats] = min_max_scaler.transform(df_test[feats])

In [8]:
df_train['failure'] = [1 if i < 50 else 0 for i in df_train.RUL]
y_true['failure'] = [1 if i < 50 else 0 for i in y_true.RUL]

In [9]:
df_train.head()

Unnamed: 0,UnitNumber,Cycle,OpSet1,OpSet2,OpSet3,SensorMeasure1,SensorMeasure2,SensorMeasure3,SensorMeasure4,SensorMeasure5,SensorMeasure6,SensorMeasure7,SensorMeasure8,SensorMeasure9,SensorMeasure10,SensorMeasure11,SensorMeasure12,SensorMeasure13,SensorMeasure14,SensorMeasure15,SensorMeasure16,SensorMeasure17,SensorMeasure18,SensorMeasure19,SensorMeasure20,SensorMeasure21,RUL,failure
0,1,1,-0.08046,-0.666667,-1.0,-1.0,-0.63253,-0.186396,-0.380486,-1.0,1.0,0.452496,-0.515152,-0.78049,-1.0,-0.261905,0.266525,-0.588235,-0.600784,-0.272028,-1.0,-0.333333,-1.0,-1.0,0.426357,0.449323,191,0
1,1,2,0.218391,-0.5,-1.0,-1.0,-0.433735,-0.093961,-0.294733,-1.0,1.0,0.256039,-0.575758,-0.799515,-1.0,-0.238095,0.530917,-0.441176,-0.674373,-0.177376,-1.0,-0.333333,-1.0,-1.0,0.333333,0.462027,190,0
2,1,3,-0.494253,0.5,-1.0,-1.0,-0.313253,-0.260955,-0.258947,-1.0,1.0,0.42029,-0.454545,-0.719914,-1.0,-0.5,0.590618,-0.558824,-0.656414,-0.28511,-1.0,-0.666667,-1.0,-1.0,0.255814,0.242751,189,0
3,1,4,0.08046,0.0,-1.0,-1.0,-0.313253,-0.487683,-0.33761,-1.0,1.0,0.481481,-0.363636,-0.750965,-1.0,-0.666667,0.778252,-0.411765,-0.650222,-0.666795,-1.0,-0.333333,-1.0,-1.0,0.147287,0.324772,188,0
4,1,5,-0.218391,-0.333333,-1.0,-1.0,-0.301205,-0.485066,-0.190749,-1.0,1.0,0.336554,-0.515152,-0.700081,-1.0,-0.488095,0.492537,-0.529412,-0.650532,-0.195845,-1.0,-0.166667,-1.0,-1.0,0.178295,0.409003,187,0


In [10]:
df_test.head()

Unnamed: 0,UnitNumber,Cycle,OpSet1,OpSet2,OpSet3,SensorMeasure1,SensorMeasure2,SensorMeasure3,SensorMeasure4,SensorMeasure5,SensorMeasure6,SensorMeasure7,SensorMeasure8,SensorMeasure9,SensorMeasure10,SensorMeasure11,SensorMeasure12,SensorMeasure13,SensorMeasure14,SensorMeasure15,SensorMeasure16,SensorMeasure17,SensorMeasure18,SensorMeasure19,SensorMeasure20,SensorMeasure21
0,1,1,0.264368,0.5,-1.0,-1.0,0.090361,-0.378679,-0.461175,-1.0,1.0,0.304348,-0.575758,-0.744773,-1.0,-0.583333,0.292111,-0.558824,-0.73568,-0.38207,-1.0,-0.333333,-1.0,-1.0,0.116279,0.323667
1,1,2,-0.310345,-0.5,-1.0,-1.0,-0.698795,-0.240898,-0.555368,-1.0,1.0,0.610306,-0.666667,-0.706632,-1.0,-0.22619,0.479744,-0.470588,-0.590463,-0.573682,-1.0,-0.166667,-1.0,-1.0,0.364341,0.373654
2,1,3,0.034483,0.166667,-1.0,-1.0,-0.246988,-0.306736,-0.355503,-1.0,1.0,0.371981,-0.545455,-0.683837,-1.0,-0.22619,0.398721,-0.558824,-0.688719,-0.082724,-1.0,-0.166667,-1.0,-1.0,0.457364,0.442695
3,1,4,0.482759,0.0,-1.0,-1.0,-0.259036,-0.429693,-0.183997,-1.0,1.0,0.359098,-0.606061,-0.788567,-1.0,-0.488095,0.147122,-0.5,-0.65982,-0.485956,-1.0,-0.5,-1.0,-1.0,0.333333,0.32422
4,1,5,0.16092,0.0,-1.0,-1.0,-0.216867,-0.295836,-0.335922,-1.0,1.0,0.388084,-0.666667,-0.795208,-1.0,-0.452381,0.47548,-0.558824,-0.694499,-0.39823,-1.0,-0.666667,-1.0,-1.0,0.317829,0.432753


In [11]:
y_true.head()

Unnamed: 0,RUL,UnitNumber,failure
0,112,0,0
1,98,1,0
2,69,2,0
3,82,3,0
4,91,4,0



LSTM expects an input in the shape of a numpy array of 3 dimensions and I will need to convert train and test data accordingly.

In [12]:
def gen_train(id_df, seq_length, seq_cols):
    """
        function to prepare train data into (samples, time steps, features)
        id_df = train dataframe
        seq_length = look back period
        seq_cols = feature columns
    """
        
    data_array = id_df[seq_cols].values
    num_elements = data_array.shape[0]
    lstm_array=[]
    
    for start, stop in zip(range(0, num_elements-seq_length+1), range(seq_length, num_elements+1)):
        lstm_array.append(data_array[start:stop, :])
    
    return np.array(lstm_array)

In [13]:
def gen_target(id_df, seq_length, label):
    data_array = id_df[label].values
    num_elements = data_array.shape[0]
    return data_array[seq_length-1:num_elements+1]

In [14]:
def gen_test(id_df, seq_length, seq_cols, mask_value):
    """
        function to prepare test data into (samples, time steps, features)
        function only returns last sequence of data for every unit
        id_df = test dataframe
        seq_length = look back period
        seq_cols = feature columns
    """
    df_mask = pd.DataFrame(np.zeros((seq_length-1,id_df.shape[1])),columns=id_df.columns)
    df_mask[:] = mask_value
    
    id_df = df_mask.append(id_df,ignore_index=True)
    
    data_array = id_df[seq_cols].values
    num_elements = data_array.shape[0]
    lstm_array=[]
    
    start = num_elements-seq_length
    stop = num_elements
    
    lstm_array.append(data_array[start:stop, :])
    
    return np.array(lstm_array)


Let's define look back period and mask_value

In [15]:
sequence_length = 50 #predicting using last 30 cycle values
mask_value = 0


Let's prepare data using above functions.

In [16]:
#generate train
x_train=np.concatenate(list(list(gen_train(df_train[df_train['UnitNumber']==unit], sequence_length, feats)) for unit in df_train['UnitNumber'].unique()))
print(x_train.shape)

(15731, 50, 24)


In [17]:
#generate target of train
y_train = np.concatenate(list(list(gen_target(df_train[df_train['UnitNumber']==unit], sequence_length, "failure")) for unit in df_train['UnitNumber'].unique()))
y_train.shape

(15731,)

In [18]:
#generate test
x_test=np.concatenate(list(list(gen_test(df_test[df_test['UnitNumber']==unit], sequence_length, feats, mask_value)) for unit in df_test['UnitNumber'].unique()))
print(x_test.shape)

(100, 50, 24)


In [19]:
#true target of test 
y_test = y_true.RUL.values
y_test.shape

(100,)

In [20]:
nb_features = x_train.shape[2]
nb_out = 1

In [21]:
nb_features

24

In [22]:
class_0 = pd.Series(y_train).value_counts()[0]
class_1 = pd.Series(y_train).value_counts()[1]
total = class_0 + class_1

cls_wt = {0: class_1/total, 1: class_0/total}

In [23]:
cls_wt

{0: 0.3178437480134766, 1: 0.6821562519865234}

## Model 1

In [24]:
history = History()

model = Sequential()
model.add(LSTM(
         units=8,
         return_sequences=False,
         input_shape=(sequence_length, nb_features)))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 8)                 1056      
_________________________________________________________________
dense (Dense)                (None, 1)                 9         
Total params: 1,065
Trainable params: 1,065
Non-trainable params: 0
_________________________________________________________________


In [25]:
%%time
# fit the model
model.fit(x_train, y_train, epochs=100, batch_size=64, validation_split=0.1, verbose=1, class_weight = cls_wt,
          callbacks = [history,
                       keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')])

Train on 14157 samples, validate on 1574 samples
Epoch 1/100
Epoch 2/100
   64/14157 [..............................] - ETA: 3s - loss: 0.1306 - acc: 0.8906



Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
CPU times: user 1min 35s, sys: 5.54 s, total: 1min 41s
Wall time: 1min 2s


<tensorflow.python.keras.callbacks.History at 0x7f8373821490>

In [27]:
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (10, 4))

# Accuracy
ax[0].plot(history.history['acc'])
ax[0].plot(history.history['val_acc'])
ax[0].set_ylabel('Accuracy')
ax[0].set_xlabel('# Epoch')
ax[0].legend(['train', 'validation'], loc='upper left')
ax[0].set_title('Accuracy')

# Loss
ax[1].plot(history.history['loss'])
ax[1].plot(history.history['val_loss'])
ax[1].set_ylabel('Loss')
ax[1].set_xlabel('# Epoch')
ax[1].legend(['train', 'validation'], loc='upper left')
ax[1].set_title('Loss')

Text(0.5, 1.0, 'Loss')

In [28]:
def print_results(y_test, y_pred):
    
    #f1-score
    f1 = f1_score(y_test, y_pred)
    print("F1 Score: ", f1)
    print(classification_report(y_true.failure, y_pred))
    
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(12,12))
    plt.subplot(221)
    sns.heatmap(conf_matrix, fmt = "d",annot=True, cmap='Blues')
    b, t = plt.ylim()
    plt.ylim(b + 0.5, t - 0.5)
    plt.title('Confuion Matrix')
    plt.ylabel('True Values')
    plt.xlabel('Predicted Values')

    #roc_auc_score
    model_roc_auc = roc_auc_score(y_test, y_pred) 
    print ("Area under curve : ",model_roc_auc,"\n")
    fpr,tpr,thresholds = roc_curve(y_test, y_pred)
    gmeans = np.sqrt(tpr * (1-fpr))
    ix = np.argmax(gmeans)
    threshold = np.round(thresholds[ix],3)

    plt.subplot(222)
    plt.plot(fpr, tpr, color='darkorange', lw=1, label = "Auc : %.3f" %model_roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.scatter(fpr[ix], tpr[ix], marker='o', color='black', label='Best Threshold:' + str(threshold))
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic')
    plt.legend(loc="lower right")

In [29]:
y_pred = model.predict_classes(x_test)

print_results(y_true.failure, y_pred)

F1 Score:  0.967741935483871
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        70
           1       0.94      1.00      0.97        30

    accuracy                           0.98       100
   macro avg       0.97      0.99      0.98       100
weighted avg       0.98      0.98      0.98       100

Area under curve :  0.9857142857142857 





In [30]:
confusion_matrix(y_train, model.predict_classes(x_train))



array([[10413,   318],
       [  203,  4797]])

In [31]:
train_probs = model.predict_proba(x_train)



### Perturbation 

In [32]:
y_true['failure_probability'] = model.predict_proba(x_test)
y_true.head()



Unnamed: 0,RUL,UnitNumber,failure,failure_probability
0,112,0,0,0.00056
1,98,1,0,0.001515
2,69,2,0,0.115699
3,82,3,0,0.005637
4,91,4,0,0.005229


In [33]:
feature_importance = pd.DataFrame(columns=["feature", "Importance"])

for i, feat in enumerate(feats):
    
    new_x = x_test.copy()
    
    np.random.seed(42)
    perturbation = np.random.normal(0.0, 1, size = new_x.shape[:2])
    new_x[:, :, i] = new_x[:, :, i] + perturbation
    perturbed_out = model.predict_proba(new_x)
    
    effect = mean_squared_error(y_true.failure_probability, perturbed_out)
    feature_importance = feature_importance.append({"feature" : feat, "Importance" : effect}, ignore_index=True)



In [34]:
fig, ax = plt.subplots(figsize = (8,10))
feature_importance = feature_importance.sort_values(by = "Importance")
feature_importance.plot.barh(x = 'feature', y = 'Importance', ax = ax)

<matplotlib.axes._subplots.AxesSubplot at 0x7f83736ad390>

This is not right because OpSet3 is having on of the highest importance. Since we have only 8 nodes in hidden layer, network is not sparse and the weight of different features are shared. 

To remedy this, I would make sparse network with nodes in a hidden layer more than input features. This also helps for feature learning and to enrich feature represenatation.

## Model 2

In [35]:
len(feats)

24

In [36]:
history = History()

model2 = Sequential()
model2.add(LSTM(
         units=50,
         return_sequences=False,
         input_shape=(sequence_length, nb_features)))
model2.add(Dense(units=1, activation='sigmoid'))
model2.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

model2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 50)                15000     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 51        
Total params: 15,051
Trainable params: 15,051
Non-trainable params: 0
_________________________________________________________________


In [37]:
%%time
# fit the model
model2.fit(x_train, y_train, epochs=100, batch_size=64, validation_split=0.1, verbose=1, class_weight = cls_wt,
          callbacks = [history,
                       keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto')])

Train on 14157 samples, validate on 1574 samples
Epoch 1/100



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
CPU times: user 2min 46s, sys: 12.4 s, total: 2min 58s
Wall time: 1min 51s


<tensorflow.python.keras.callbacks.History at 0x7f8362527e10>

In [38]:
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (10, 4))

# Accuracy
ax[0].plot(history.history['acc'])
ax[0].plot(history.history['val_acc'])
ax[0].set_ylabel('Accuracy')
ax[0].set_xlabel('# Epoch')
ax[0].legend(['train', 'validation'], loc='upper left')
ax[0].set_title('Accuracy')

# Loss
ax[1].plot(history.history['loss'])
ax[1].plot(history.history['val_loss'])
ax[1].set_ylabel('Loss')
ax[1].set_xlabel('# Epoch')
ax[1].legend(['train', 'validation'], loc='upper left')
ax[1].set_title('Loss')

Text(0.5, 1.0, 'Loss')

In [39]:
y_pred = model2.predict_classes(x_test)

print_results(y_true.failure, y_pred)

F1 Score:  0.9375
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        70
           1       0.88      1.00      0.94        30

    accuracy                           0.96       100
   macro avg       0.94      0.97      0.95       100
weighted avg       0.96      0.96      0.96       100





Area under curve :  0.9714285714285714 



In [40]:
confusion_matrix(y_train, model2.predict_classes(x_train))



array([[10434,   297],
       [   88,  4912]])

In [41]:
test_probs2 = model2.predict_proba(x_test)



### Perturbation 

In [42]:
feature_importance2 = pd.DataFrame(columns=["feature", "Importance"])

for i, feat in enumerate(feats):
    
    new_x = x_test.copy()
    
    np.random.seed(42)
    perturbation = np.random.normal(0.0, 1, size = new_x.shape[:2])
    new_x[:, :, i] = new_x[:, :, i] + perturbation
    perturbed_out = model2.predict_proba(new_x)
    
    effect = mean_squared_error(test_probs2, perturbed_out)
    feature_importance2 = feature_importance2.append({"feature" : feat, "Importance" : effect}, ignore_index=True)



In [43]:
fig, ax = plt.subplots(figsize = (8,10))
feature_importance2 = feature_importance2.sort_values(by = "Importance")
feature_importance2.plot.barh(x = 'feature', y = 'Importance', ax = ax)
ax.set_title('Perturbation: Feature Importance')

Text(0.5, 1.0, 'Perturbation: Feature Importance')

### Missing Values 

In [44]:
feature_importance2 = pd.DataFrame(columns=["feature", "Importance"])

for i, feat in enumerate(feats):
    
    new_x = x_test.copy()
    
    new_x[:, :, i] = 0
    missing_probs = model2.predict_proba(new_x)
    
    effect = mean_squared_error(test_probs2, missing_probs)
    feature_importance2 = feature_importance2.append({"feature" : feat, "Importance" : effect}, ignore_index=True)



In [45]:
fig, ax = plt.subplots(figsize = (8,10))
feature_importance2 = feature_importance2.sort_values(by = "Importance")
feature_importance2.plot.barh(x = 'feature', y = 'Importance', ax = ax)
ax.set_title('Missing Values: Feature Importance')

Text(0.5, 1.0, 'Missing Values: Feature Importance')

### Permutation 

In [46]:
feature_importance2 = pd.DataFrame(columns=["feature", "Importance"])

for i, feat in enumerate(feats):
    
    new_x = x_test.copy()
    
    np.random.shuffle(new_x[:, :, i])
    permute_probs = model2.predict_proba(new_x)
    
    effect = mean_squared_error(test_probs2, permute_probs)
    feature_importance2 = feature_importance2.append({"feature" : feat, "Importance" : effect}, ignore_index=True)



In [47]:
fig, ax = plt.subplots(figsize = (8,10))
feature_importance2 = feature_importance2.sort_values(by = "Importance")
feature_importance2.plot.barh(x = 'feature', y = 'Importance', ax = ax)
ax.set_title('Permutation: Feature Importance')

Text(0.5, 1.0, 'Permutation: Feature Importance')

### Shap Values 

In [48]:
import shap

random_ind = np.random.choice(x_train.shape[0], 1000, replace = False)
data = x_train[random_ind[0:1000]]

DE = shap.DeepExplainer(model2, data)
shap_val = DE.shap_values(x_test)
shap_val = np.array(shap_val)
shap_val = np.reshape(shap_val, (shap_val.shape[1], shap_val.shape[2], shap_val.shape[3]))
shap_abs = np.abs(shap_val)
sum_0 = np.sum(shap_abs, axis = 0)

print(sum_0.shape)




keras is no longer supported, please use tf.keras instead.
Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode. See PR #1483 for discussion.


(50, 24)


#### Last day's important features

In [49]:
shap_feat_importance = pd.DataFrame(columns=["features", "Importance"])
shap_feat_importance.features = feats
shap_feat_importance.Importance = sum_0[49]

fig, ax = plt.subplots(figsize = (8,10))
shap_feat_importance = shap_feat_importance.sort_values(by = "Importance")
shap_feat_importance.plot.barh(x = 'features', y = 'Importance', ax = ax)
ax.set_title('Shap Feature Importance of last cycle')

Text(0.5, 1.0, 'Shap Feature Importance of last cycle')

#### First day's important features

In [50]:
shap_feat_importance = pd.DataFrame(columns=["features", "Importance"])
shap_feat_importance.features = feats
shap_feat_importance.Importance = sum_0[0]

fig, ax = plt.subplots(figsize = (8,10))
shap_feat_importance = shap_feat_importance.sort_values(by = "Importance")
shap_feat_importance.plot.barh(x = 'features', y = 'Importance', ax = ax)
ax.set_title('Shap Feature Importance of first cycle')

Text(0.5, 1.0, 'Shap Feature Importance of first cycle')

#### Over all feature importance

In [51]:
shap_feat_importance = pd.DataFrame(columns=["features", "Importance"])
shap_feat_importance.features = feats
shap_feat_importance.Importance = sum_0.sum(axis = 0)

fig, ax = plt.subplots(figsize = (8,10))
shap_feat_importance = shap_feat_importance.sort_values(by = "Importance")
shap_feat_importance.plot.barh(x = 'features', y = 'Importance', ax = ax)
ax.set_title('Shap Feature Importance')

Text(0.5, 1.0, 'Shap Feature Importance')

### Feature Importance Visualization over time steps

In [52]:
feat_imp_df = pd.DataFrame(sum_0, columns=feats)
feat_imp_df.head()

Unnamed: 0,OpSet1,OpSet2,OpSet3,SensorMeasure1,SensorMeasure2,SensorMeasure3,SensorMeasure4,SensorMeasure5,SensorMeasure6,SensorMeasure7,SensorMeasure8,SensorMeasure9,SensorMeasure10,SensorMeasure11,SensorMeasure12,SensorMeasure13,SensorMeasure14,SensorMeasure15,SensorMeasure16,SensorMeasure17,SensorMeasure18,SensorMeasure19,SensorMeasure20,SensorMeasure21
0,0.067466,0.076687,0.004015,0.011458,0.101673,0.090934,0.172537,0.003888,0.02418,0.107604,0.07031,0.143296,0.009383,0.113334,0.043733,0.138703,0.184108,0.031607,0.011876,0.105792,0.007607,0.003834,0.134396,0.085472
1,0.053268,0.074028,0.003636,0.010612,0.091895,0.107709,0.174734,0.003741,0.013021,0.081406,0.060662,0.145939,0.006879,0.098566,0.0452,0.118558,0.196173,0.032632,0.010213,0.117191,0.006067,0.002817,0.083487,0.110836
2,0.036218,0.106497,0.002973,0.010475,0.065134,0.113376,0.228954,0.003983,0.019043,0.081371,0.068281,0.146996,0.003563,0.108903,0.041104,0.096067,0.190053,0.036836,0.008845,0.092825,0.007466,0.002597,0.062676,0.130094
3,0.050372,0.117098,0.00342,0.011448,0.069166,0.10956,0.239072,0.004397,0.025305,0.057173,0.059248,0.11968,0.002591,0.128787,0.040907,0.07035,0.186291,0.034789,0.009403,0.114578,0.009177,0.002745,0.039357,0.165948
4,0.053757,0.134172,0.001597,0.005057,0.052465,0.123351,0.224208,0.001907,0.01333,0.059612,0.072625,0.121684,0.001313,0.151862,0.042295,0.065845,0.175337,0.030067,0.004184,0.097666,0.004025,0.001135,0.041962,0.212656


#### Operational Setting

In [53]:
fig, ax = plt.subplots(figsize = (7,5))
feat_imp_df[["OpSet1", "OpSet2", "OpSet3"]].plot(ax = ax)
ax.set_xlabel("Cycle")
ax.set_ylabel("Feature Importance")

Text(0, 0.5, 'Feature Importance')

#### Top 5 Sensor Measures

In [54]:
fig, ax = plt.subplots(figsize = (7,5))
feat_imp_df[["SensorMeasure4", "SensorMeasure21", "SensorMeasure7", "SensorMeasure12", "SensorMeasure11"]].plot(ax = ax)
ax.set_xlabel("Cycle")
ax.set_ylabel("Feature Importance")

Text(0, 0.5, 'Feature Importance')