In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(11.7,8.27)})
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib
import tensorflow as tf
np.random.seed(0)
tf.random.set_seed(0)
FROM_SCRATCH = False
TF_MODEL_FNAMER = 'tf-clf-sales'
RFC_FNAMER = 'rfc-sales'
ENC_FNAMER = 'sales_encoderR'
DEC_FNAMER = 'sales_decoderR'

9.1.1 Preparing the data.
We’re using the wine-quality dataset, a numeric tabular dataset containing features that refer to the chemical composition of wines and quality ratings. To make this a simple classification task, we bucket all wines with ratings greater
than five as good, and the rest we label bad. We also normalize all the features.

In [3]:
store = pd.read_csv('input/store.csv')
train = pd.read_csv('input/train.csv',parse_dates=[2])
test = pd.read_csv('input/test.csv',parse_dates=[3])
# fillna in store with 0 has better result than median()
# Aufbereiten der daten
store.fillna(0, inplace=True)
# fill missing values in test with 1
# Aufbereiten der Daten
test.fillna(value = 1, inplace = True)
# merge data with store
# Alles in eine Tabelle
train = pd.merge(train, store, on='Store')
test = pd.merge(test, store, on='Store')


  train = pd.read_csv('input/train.csv',parse_dates=[2])


In [4]:
train.head()

Unnamed: 0,Store,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval
0,1,5,2015-07-31,5263,555,1,1,0,1,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
1,1,4,2015-07-30,5020,546,1,1,0,1,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
2,1,3,2015-07-29,4782,523,1,1,0,1,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
3,1,2,2015-07-28,5011,560,1,1,0,1,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
4,1,1,2015-07-27,6102,612,1,1,0,1,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0


In [5]:
test.head()

Unnamed: 0,Id,Store,DayOfWeek,Date,Open,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval
0,1,1,4,2015-09-17,1.0,1,0,0,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
1,857,1,3,2015-09-16,1.0,1,0,0,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
2,1713,1,2,2015-09-15,1.0,1,0,0,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
3,2569,1,1,2015-09-14,1.0,1,0,0,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0
4,3425,1,7,2015-09-13,0.0,0,0,0,c,a,1270.0,9.0,2008.0,0,0.0,0.0,0


In [6]:
# process train and test
# aufbereiten der daten, neue Spalten und manche werden entfernt
def process(data, isTest = False):
    # label encode some features
    mappings = {'0':0, 'a':1, 'b':2, 'c':3, 'd':4}
    # buchstaben zu zahlen
    data.StoreType.replace(mappings, inplace=True)
    data.Assortment.replace(mappings, inplace=True)
    data.StateHoliday.replace(mappings, inplace=True)

    # extract some features from date column
    data['Month'] = data.Date.dt.month
    data['Year'] = data.Date.dt.year
    data['Day'] = data.Date.dt.day
    data['WeekOfYear'] = data.Date.dt.weekofyear

    # calculate competiter open time in months
    data['CompetitionOpen'] = 12 * (data.Year - data.CompetitionOpenSinceYear) + \
        (data.Month - data.CompetitionOpenSinceMonth)
    data['CompetitionOpen'] = data['CompetitionOpen'].apply(lambda x: x if x > 0 else 0)

    # calculate promo2 open time in months
    data['PromoOpen'] = 12 * (data.Year - data.Promo2SinceYear) + \
        (data.WeekOfYear - data.Promo2SinceWeek) / 4.0
    data['PromoOpen'] = data['PromoOpen'].apply(lambda x: x if x > 0 else 0)

    # Indicate whether the month is in promo interval
    month2str = {1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', \
             7:'Jul', 8:'Aug', 9:'Sept', 10:'Oct', 11:'Nov', 12:'Dec'}
    data['month_str'] = data.Month.map(month2str)

    def check(row):
        if isinstance(row['PromoInterval'],str) and row['month_str'] in row['PromoInterval']:
            return 1
        else:
            return 0

    data['IsPromoMonth'] =  data.apply(lambda row: check(row),axis=1)

    # select the features we need
    features = ['Store', 'DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday',
       'StoreType', 'Assortment', 'CompetitionDistance',
       'CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear', 'Promo2',
       'Promo2SinceWeek', 'Promo2SinceYear', 'Year', 'Month', 'Day',
       'WeekOfYear', 'CompetitionOpen', 'PromoOpen', 'IsPromoMonth']
    if not isTest:
        features.append('Sales')

    data = data[features]
    return data

train = train.sort_values(['Date'],ascending = False)
train = process(train)
test = process(test,isTest = True)

  data['WeekOfYear'] = data.Date.dt.weekofyear
  data['WeekOfYear'] = data.Date.dt.weekofyear


In [7]:
test.head()

Unnamed: 0,Store,DayOfWeek,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,Year,Month,Day,WeekOfYear,CompetitionOpen,PromoOpen,IsPromoMonth
0,1,4,1,0,0,3,1,1270.0,9.0,2008.0,0,0.0,0.0,2015,9,17,38,84.0,24189.5,0
1,1,3,1,0,0,3,1,1270.0,9.0,2008.0,0,0.0,0.0,2015,9,16,38,84.0,24189.5,0
2,1,2,1,0,0,3,1,1270.0,9.0,2008.0,0,0.0,0.0,2015,9,15,38,84.0,24189.5,0
3,1,1,1,0,0,3,1,1270.0,9.0,2008.0,0,0.0,0.0,2015,9,14,38,84.0,24189.5,0
4,1,7,0,0,0,3,1,1270.0,9.0,2008.0,0,0.0,0.0,2015,9,13,37,84.0,24189.25,0


In [8]:
train.head()

Unnamed: 0,Store,DayOfWeek,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,...,Promo2SinceWeek,Promo2SinceYear,Year,Month,Day,WeekOfYear,CompetitionOpen,PromoOpen,IsPromoMonth,Sales
0,1,5,1,0,1,3,1,1270.0,9.0,2008.0,...,0.0,0.0,2015,7,31,31,82.0,24187.75,0,5263
679364,747,5,1,0,1,3,3,45740.0,8.0,2008.0,...,0.0,0.0,2015,7,31,31,83.0,24187.75,0,10708
702362,772,5,1,0,1,4,3,1850.0,0.0,0.0,...,0.0,0.0,2015,7,31,31,24187.0,24187.75,0,5224
683890,752,5,1,0,1,1,1,970.0,3.0,2013.0,...,31.0,2013.0,2015,7,31,31,28.0,24.0,0,7763
17714,20,5,1,0,0,4,1,2340.0,5.0,2009.0,...,40.0,2014.0,2015,7,31,31,74.0,9.75,1,9593


In [9]:
train['class'] = 'bad'
train.loc[(train['Sales'] > 6300), 'class'] = 'good'
featuresR  = ['Store', 'DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday',
       'StoreType', 'Assortment', 'CompetitionDistance',
       'CompetitionOpenSinceMonth', 'CompetitionOpenSinceYear', 'Promo2',
       'Promo2SinceWeek', 'Promo2SinceYear', 'Year', 'Month', 'Day',
       'WeekOfYear', 'CompetitionOpen', 'PromoOpen', 'IsPromoMonth']
train['good'] = 0
train['bad'] = 0
train.loc[train['class'] == 'good', 'good'] = 1
train.loc[train['class'] == 'bad', 'bad'] = 1
train_data = train[featuresR].to_numpy()
labels_train = train[['class','good', 'bad']].to_numpy()

X_trainR, X_testR, y_trainR, y_testR = train_test_split(train_data, labels_train, random_state=0)
X_trainR, X_testR = X_trainR.astype('float32'), X_testR.astype('float32')
y_train_labR, y_test_labR = y_trainR[:, 0], y_testR[:, 0]
y_trainR, y_testR = y_trainR[:, 1:].astype('float32'), y_testR[:, 1:].astype('float32')
scalerR = StandardScaler()
scalerR.fit(X_trainR)
category_map = {1: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 2:['PromoNo', 'PromoYes'], 3: ['NoStateHoliday', 'PublicHoliday', 'EasterHoliday', 'ChristmasHoliday'],
                4:['SchoolHolidayNo', 'SchoolHolidayYes'], 5: ['StoreTypeA', 'StoreTypeB', 'StoreTypeC', 'StoreTypeD'], 6:['Basic', 'Extra', 'Extended'], 8:['None','Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 10: ['NoPromo2', 'Promo2'], 14: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 15: ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 19: ['NoPromoMonth', 'PromoMonth'] }

In [10]:
train.head()

Unnamed: 0,Store,DayOfWeek,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,...,Month,Day,WeekOfYear,CompetitionOpen,PromoOpen,IsPromoMonth,Sales,class,good,bad
0,1,5,1,0,1,3,1,1270.0,9.0,2008.0,...,7,31,31,82.0,24187.75,0,5263,bad,0,1
679364,747,5,1,0,1,3,3,45740.0,8.0,2008.0,...,7,31,31,83.0,24187.75,0,10708,good,1,0
702362,772,5,1,0,1,4,3,1850.0,0.0,0.0,...,7,31,31,24187.0,24187.75,0,5224,bad,0,1
683890,752,5,1,0,1,1,1,970.0,3.0,2013.0,...,7,31,31,28.0,24.0,0,7763,good,1,0
17714,20,5,1,0,0,4,1,2340.0,5.0,2009.0,...,7,31,31,74.0,9.75,1,9593,good,1,0


Select good wine instance
We partition the dataset into good and bad portions and select an instance of interest. I’ve chosen it to be a good quality
wine.
Note that bad wines are class 1 and correspond to the second model output being high, whereas good wines are class
0 and correspond to the first model output being high.

In [11]:
bad_days = np.array([a for a, b in zip(X_trainR, y_trainR) if b[1] == 1])
good_days = np.array([a for a, b in zip(X_trainR, y_trainR) if b[1] == 0])
xR = np.array([[747,5,1,0,1,3,3,45740.0,8.0,2008.0,1,0.0,0.0,2015,7,31,31,83.0,24187.75,0]])

9.1.2 Training models
Creating an Autoencoder
For some of the explainers, we need an autoencoder to check whether example instances are close to the training data
distribution or not.

In [12]:
from tensorflow.keras.layers import Dense
from tensorflow import keras
ENCODING_DIM = 3 #7
BATCH_SIZE = 32 #64
EPOCHS = 50 #100
class AER(keras.Model):
    def __init__(self, encoder: keras.Model, decoder: keras.Model, **kwargs) -> None:
        super().__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
    def call(self, x: tf.Tensor, **kwargs):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat
def make_aeR():
    len_input_output = X_trainR.shape[-1]

    encoder = keras.Sequential()
    encoder.add(Dense(units=ENCODING_DIM*2, activation="relu", input_shape=(len_input_output)))
    encoder.add(Dense(units=ENCODING_DIM, activation="relu"))
    decoder = keras.Sequential()
    decoder.add(Dense(units=ENCODING_DIM*2, activation="relu", input_shape=(ENCODING_DIM)))
    decoder.add(Dense(units=len_input_output, activation="linear"))
    ae = AER(encoder=encoder, decoder=decoder)
    ae.compile(optimizer='adam', loss='mean_squared_error')
    history = ae.fit(
        scalerR.transform(X_trainR),
        scalerR.transform(X_trainR),
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        verbose=False,)
    # loss = history.history['loss']
    # plt.plot(loss)
    # plt.xlabel('Epoch')
    # plt.ylabel('MSE-Loss')
    ae.encoder.save(f'{ENC_FNAMER}.h5')
    ae.decoder.save(f'{DEC_FNAMER}.h5')
    return ae

def load_ae_modelR():
    encoder = load_model(f'{ENC_FNAMER}.h5')
    decoder = load_model(f'{DEC_FNAMER}.h5')
    return AER(encoder=encoder, decoder=decoder)

Random Forest Model
We need a tree-based model to get results for the tree SHAP explainer. Hence we train a random forest on the winequality dataset.

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

def make_rfcR():
    rfc = RandomForestClassifier(n_estimators=25)
    rfc.fit(scalerR.transform(X_trainR), y_train_labR)
    y_predR = rfc.predict(scalerR.transform(X_testR))
    print('accuracy_score:', accuracy_score(y_predR, y_test_labR))
    print('f1_score:', f1_score(y_test_labR, y_predR, average=None))
    joblib.dump(rfc, f"{RFC_FNAMER}.joblib")
    return rfc

def load_rfc_modelR():
    return joblib.load(f"{RFC_FNAMER}.joblib")

XGBoost Model

In [24]:
# define eval metrics
# Mittleres Abweichungsquadrat
def rmspe(y, yhat):
    return np.sqrt(np.mean((yhat/y-1) ** 2))
# expm1 ist umkehr von log1p
def rmspe_xg(yhat, y):
    y = np.expm1(y.get_label())
    yhat = np.expm1(yhat)
    return "rmspe", rmspe(y,yhat)

In [27]:
print(X_trainR)

[[8.320000e+02 5.000000e+00 1.000000e+00 ... 2.416800e+04 4.900000e+01
  0.000000e+00]
 [4.070000e+02 1.000000e+00 0.000000e+00 ... 1.180000e+02 2.900000e+01
  1.000000e+00]
 [3.980000e+02 7.000000e+00 0.000000e+00 ... 2.417300e+04 2.875000e+01
  0.000000e+00]
 ...
 [5.850000e+02 5.000000e+00 1.000000e+00 ... 2.000000e+00 2.417375e+04
  0.000000e+00]
 [7.420000e+02 6.000000e+00 0.000000e+00 ... 2.417800e+04 2.417850e+04
  0.000000e+00]
 [1.043000e+03 2.000000e+00 0.000000e+00 ... 8.200000e+01 2.415725e+04
  0.000000e+00]]


In [28]:
print(y_trainR)

[[1. 0.]
 [0. 1.]
 [0. 1.]
 ...
 [0. 1.]
 [1. 0.]
 [0. 1.]]


In [42]:
y_train_xgb = y_train_labR
y_train_xgb = [1 if x == 'good' else 0 for x in y_train_xgb]
y_test_xgb = y_test_labR
y_test_xgb = [1 if x == 'good' else 0 for x in y_test_xgb]

In [58]:
import xgboost as xgb

def make_xgb_modelR():
    params = {"objective": "reg:linear", # for linear regression
              "booster" : "gbtree",   # use tree based models
              "eta": 0.03,   # learning rate
              "max_depth": 10,    # maximum depth of a tree
              "subsample": 0.9,    # Subsample ratio of the training instances
              "colsample_bytree": 0.7,   # Subsample ratio of columns when constructing each tree
              "silent": 1,   # silent mode
              "seed": 10   # Random number seed
              }
    # anzahl trainingsrunden
    num_boost_round = 500

    dtrain = xgb.DMatrix(X_trainR, y_train_xgb)
    dtest = xgb.DMatrix(X_testR, y_test_xgb)
    watchlist = [(dtrain, 'train'), (dtest, 'eval')]
    # train the xgboost model
    model = xgb.train(params, dtrain, num_boost_round, evals=watchlist, \
      early_stopping_rounds= 500, feval=rmspe_xg, verbose_eval=True)
    y_predR = np.rint(model.predict(xgb.DMatrix(X_testR)))
    print(y_predR)
    print('accuracy_score:', accuracy_score(y_predR, y_test_xgb))
    print('f1_score:', f1_score(y_predR, y_test_xgb))

    return model

Tensorflow Model
Finally, we also train a TensorFlow model.


In [59]:
from keras.models import load_model
from tensorflow import keras
from tensorflow.keras import layers

def make_tf_modelR():
    inputs = keras.Input(shape=X_trainR.shape[1])
    x = layers.Dense(6, activation="relu")(inputs)
    outputs = layers.Dense(2, activation="softmax")(x)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])
    history = model.fit(
        scalerR.transform(X_trainR),
        y_trainR,
        epochs=15,
        verbose=False,
        validation_data=(scalerR.transform(X_testR), y_testR),
        )
    y_predR = model(scalerR.transform(X_testR)).numpy().argmax(axis=1)
    print('accuracy_score:', accuracy_score(y_predR, y_testR.argmax(axis=1)))
    print('f1_score:', f1_score(y_predR, y_testR.argmax(axis=1), average=None))
    model.save(f'{TF_MODEL_FNAMER}.h5')
    return model
def load_tf_modelR():
    return load_model(f'{TF_MODEL_FNAMER}.h5', compile= False)

In [60]:
modelXGBR = make_xgb_modelR()

Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:0.49719	train-rmspe:inf	eval-rmse:0.49723	eval-rmspe:inf


  return np.sqrt(np.mean((yhat/y-1) ** 2))


[1]	train-rmse:0.49285	train-rmspe:inf	eval-rmse:0.49291	eval-rmspe:inf
[2]	train-rmse:0.48763	train-rmspe:inf	eval-rmse:0.48774	eval-rmspe:inf
[3]	train-rmse:0.48237	train-rmspe:inf	eval-rmse:0.48254	eval-rmspe:inf
[4]	train-rmse:0.47723	train-rmspe:inf	eval-rmse:0.47744	eval-rmspe:inf
[5]	train-rmse:0.47199	train-rmspe:inf	eval-rmse:0.47225	eval-rmspe:inf
[6]	train-rmse:0.46695	train-rmspe:inf	eval-rmse:0.46726	eval-rmspe:inf
[7]	train-rmse:0.46233	train-rmspe:inf	eval-rmse:0.46269	eval-rmspe:inf
[8]	train-rmse:0.45826	train-rmspe:inf	eval-rmse:0.45865	eval-rmspe:inf
[9]	train-rmse:0.45494	train-rmspe:inf	eval-rmse:0.45538	eval-rmspe:inf
[10]	train-rmse:0.45130	train-rmspe:inf	eval-rmse:0.45178	eval-rmspe:inf
[11]	train-rmse:0.44779	train-rmspe:inf	eval-rmse:0.44831	eval-rmspe:inf
[12]	train-rmse:0.44406	train-rmspe:inf	eval-rmse:0.44463	eval-rmspe:inf
[13]	train-rmse:0.44213	train-rmspe:inf	eval-rmse:0.44273	eval-rmspe:inf
[14]	train-rmse:0.43839	train-rmspe:inf	eval-rmse:0.43905	ev

Load/Make models
We save and load the same models each time to ensure stable results. If they don’t exist we create new ones. If you
want to generate new models on each notebook run, then set FROM_SCRATCH=True.

In [40]:
#if FROM_SCRATCH or not os.path.isfile(f'{TF_MODEL_FNAME}.h5'):
#    model = make_tf_model()
#    rfc = make_rfc()
#    ae = make_ae()
#else:
#    rfc = load_rfc_model()
#    model = load_tf_model()
#    ae = load_ae_model()
modelR = make_tf_modelR()
rfcR = make_rfcR()
aeR = make_aeR()


accuracy_score: 0.742264149459503
f1_score: [0.68409206 0.7823439 ]
accuracy_score: 0.9245231082606182
f1_score: [0.93494089 0.91013288]


TypeError: 'int' object is not iterable

9.1.3 Util functions
These are utility functions for exploring results. The first shows two instances of the data side by side and compares
the difference. We’ll use this to see how the counterfactuals differ from their original instances. The second function
plots the importance of each feature. This will be useful for visualizing the attribution methods

In [43]:
def compare_instances(x, cf):
    """
    Show the difference in values between two instances.
    """
    x = x.astype('float64')
    cf = cf.astype('float64')
    for f, v1, v2 in zip(features, x[0], cf[0]):
        print(f'{f:<25} instance: {round(v1, 3):^10} counter factual: {round(v2, 3):^10} difference: {round(v2, 7):^5}')

def plot_importance(feat_imp, feat_names, class_idx, **kwargs):
    """
    Create a horizontal barchart of feature effects, sorted by their magnitude.
    """
    df = pd.DataFrame(data=feat_imp, columns=feat_names).sort_values(by=0, axis='columns')
    feat_imp, feat_names = df.values[0], df.columns
    fig, ax = plt.subplots(figsize=(10, 5))
    y_pos = np.arange(len(feat_imp))
    ax.barh(y_pos, feat_imp)
    ax.set_yticks(y_pos)
    ax.set_yticklabels(feat_names, fontsize=15)
    ax.invert_yaxis()
    ax.set_xlabel(f'Feature effects for class {class_idx}', fontsize=15)
    return ax, fig

9.1.5 Local Necessary Features
Anchors
Anchors tell us what features need to stay the same for a specific instance for the model to give the same classification.
In the case of a trained image classification model, an anchor for a given instance would be a minimal subset of the
image that the model uses to make its decision.
Here we apply Anchors to the tensor flow model trained on the wine-quality dataset.

In [62]:
from alibi.explainers import AnchorTabular
predict_fnR = lambda x: modelR.predict(scalerR.transform(x))
explainerR = AnchorTabular(predict_fnR, featuresR)
explainerR.fit(X_trainR, disc_perc=(25, 50, 75))
resultR = explainerR.explain(xR, threshold=0.95)

PredictorCallError: Predictor failed to be called on <class 'numpy.ndarray'> of shape (1, 20) and dtype float32. Check that the parameter `feature_names` is correctly specified.

In [64]:
predict_xgb = lambda x: np.rint(modelXGBR.predict(xgb.DMatrix(x)))
explainerXGB = AnchorTabular(predict_xgb, featuresR)
explainerXGB.fit(X_trainR, disc_perc=(25, 50, 75))
resultXGB = explainerXGB.explain(xR, threshold=0.95)

In [49]:
print('Anchor =', resultR.data['anchor'])
print('Precision = ', resultR.data['precision'])
print('Coverage = ', resultR.data['coverage'])

Anchor = ['Promo > 0.00', 'SchoolHoliday > 0.00', 'DayOfWeek <= 6.00', 'StateHoliday <= 0.00', 'PromoOpen > 24171.75']
Precision =  0.9955947136563876
Coverage =  0.023005455455849082


In [65]:
print('Anchor =', resultXGB.data['anchor'])
print('Precision = ', resultXGB.data['precision'])
print('Coverage = ', resultXGB.data['coverage'])

Anchor = ['Promo > 0.00', 'PromoOpen > 24171.75', 'Assortment > 1.00', 'StateHoliday <= 0.00', 'CompetitionOpen > 29.00', 'CompetitionDistance > 6880.00', 'Year > 2013.00', 'StoreType > 1.00', 'Promo2SinceYear <= 2009.00']
Precision =  0.9803921568627451
Coverage =  0.006392661743386472


In [53]:
print(explainerR.predictor(X_testR[idx].reshape(1, -1))[0])
explanation = explainerR.explain(X_testR[idx], threshold=0.95)
print('Anchor: %s' % (' AND '.join(explanation.anchor)))
print('Precision: %.2f' % explanation.precision)
print('Coverage: %.2f' % explanation.coverage)

1
Anchor: Promo <= 0.00 AND WeekOfYear <= 22.00 AND SchoolHoliday <= 0.00 AND StoreType > 1.00
Precision: 0.97
Coverage: 0.13


In [66]:
idx = 11
print(explainerXGB.predictor(X_testR[idx].reshape(1, -1))[0])
explanation = explainerXGB.explain(X_testR[idx], threshold=0.95)
print('Anchor: %s' % (' AND '.join(explanation.anchor)))
print('Precision: %.2f' % explanation.precision)
print('Coverage: %.2f' % explanation.coverage)

0.0
Anchor: Promo <= 0.00 AND CompetitionDistance > 2320.00 AND Month <= 8.00 AND Store > 838.00 AND WeekOfYear <= 11.00 AND Day > 8.00 AND DayOfWeek <= 2.00 AND StoreType > 1.00
Precision: 0.98
Coverage: 0.00
