Counterfactual analysis example using DiCE.
source code from: https://coderzcolumn.com/tutorials/machine-learning/dice-ml-diverse-counterfactual-explanations-for-ml-models

In [16]:
import pandas as pd
import numpy as np

import warnings

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 35)

import dice_ml

In [2]:
from sklearn.datasets import fetch_california_housing, load_boston

boston = load_boston()

boston_df = pd.DataFrame(data=boston.data, columns=boston.feature_names)
boston_df["Price"] = boston.target

boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [4]:
from sklearn.model_selection import train_test_split

print("Dataset Size : ", boston.data.shape, boston.target.shape)

X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target,
                                                    train_size=0.90,
                                                    random_state=123)

print("Train/Test Sizes : ",X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

Dataset Size :  (506, 13) (506,)
Train/Test Sizes :  (455, 13) (51, 13) (455,) (51,)


In [5]:
import tensorflow.keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [6]:
model = Sequential([
            Dense(50, activation="relu", input_shape=(len(boston.feature_names), )),
            Dense(50, activation="relu"),
            Dense(50, activation="relu"),
            Dense(1),
           ])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 50)                700       
_________________________________________________________________
dense_1 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2550      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 51        
Total params: 5,851
Trainable params: 5,851
Non-trainable params: 0
_________________________________________________________________


In [7]:
model.compile(optimizer="adam", loss="mean_squared_error", metrics=["mae"])

In [9]:
%%time

history = model.fit(X_train, Y_train, batch_size=8, epochs=100, verbose=0)

CPU times: user 6.68 s, sys: 1.87 s, total: 8.56 s
Wall time: 3.79 s


In [10]:
from sklearn.metrics import mean_squared_error, r2_score

print("Train MSE : %.2f"%mean_squared_error(Y_train, model.predict(X_train)))
print("Test  MSE : %.2f"%mean_squared_error(Y_test, model.predict(X_test)))

print("Train R2 Score : %.2f"%r2_score(Y_train, model.predict(X_train)))
print("Test  R2 Score : %.2f"%r2_score(Y_test, model.predict(X_test)))

Train MSE : 14.90
Test  MSE : 32.35
Train R2 Score : 0.81
Test  R2 Score : 0.72


In [11]:
d = dice_ml.Data(dataframe=boston_df, continuous_features=boston.feature_names.tolist(), outcome_name='Price')
m = dice_ml.Model(model=model, backend="TF2")


In [12]:
# initiate DiCE
exp = dice_ml.Dice(d, m)
exp

<dice_ml.explainer_interfaces.dice_tensorflow2.DiceTensorFlow2 at 0x1514075b0>

In [13]:
import random

idx = random.randint(1, len(X_test))

print("Actual Price : %.2f"%Y_test[idx])

sample = dict(zip(boston.feature_names, X_test[idx]))
sample

Actual Price : 21.90


{'CRIM': 3.47428,
 'ZN': 0.0,
 'INDUS': 18.1,
 'CHAS': 1.0,
 'NOX': 0.718,
 'RM': 8.78,
 'AGE': 82.9,
 'DIS': 1.9047,
 'RAD': 24.0,
 'TAX': 666.0,
 'PTRATIO': 20.2,
 'B': 354.55,
 'LSTAT': 5.29}

In [15]:
dice_exp = exp.generate_counterfactuals(sample, total_CFs=4, desired_class=1)



Diverse Counterfactuals found! total time taken: 03 min 20 sec


In [17]:
dice_exp.visualize_as_dataframe()

Query instance (original outcome : 4)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,3.47428,0.0,18.1,1.0,0.718,8.78,82.9,1.9047,24.0,666.0,20.2,354.6,5.29,3.818



Diverse Counterfactual set (new outcome: 1)


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.7269,0.0,14.1,1.0,0.718,8.78,69.4,1.3842,24.0,651.7,20.2,354.6,9.09,0
1,6.07824,0.0,18.1,1.0,0.718,8.78,82.9,1.9038,24.0,653.7,20.2,350.6,5.29,0
2,4.00939,0.0,18.1,1.0,0.718,8.78,82.9,2.2197,24.0,666.0,20.2,354.6,5.29,0
3,8.6903,0.0,18.1,1.0,0.744,8.78,82.9,1.9056,24.0,666.0,20.9,381.1,2.66,0
