In [1]:
import warnings
warnings.filterwarnings("ignore")

from carla.data.catalog import CsvCatalog
from carla import MLModelCatalog
from carla.recourse_methods import Clue, Wachter
from carla.models.negative_instances import predict_negative_instances
import numpy as np
# import torch

import sys
sys.path.insert(0,'..')
from recourse_util import update_dataset, train_recourse_method, predict, print_scores 

num = 10
# print(torch.cuda.is_available())

Using TensorFlow backend.


[INFO] Using Python-MIP package version 1.12.0 [model.py <module>]


In [2]:
def train_new_model(dataset):
    model = MLModelCatalog(dataset, "ann", load_online=False, backend="pytorch")
    model.train(
        learning_rate = 0.001,
        epochs = 10,
        max_depth = 50,
        n_estimators = 50,
        batch_size = 20,
        force_train = True
    )
    return model

In [3]:
dataset = CsvCatalog(
    # file_path='datasets/bimodal_dataset_1.csv',
    file_path='datasets/unimodal_dataset_1.csv',
    # file_path='datasets/unimodal_dataset_2.csv',
    categorical=[],
    continuous=['feature1', 'feature2'],
    immutables=[],
    target='target'
)

In [20]:
training_params = {"lr": 0.01, "epochs": 4, "batch_size": 1, "hidden_size": [20, 20]}

model = MLModelCatalog(
    dataset,
    model_type="ann",
    load_online=False,
    backend="pytorch"
)

model.train(
    learning_rate=training_params["lr"],
    epochs=training_params["epochs"],
    batch_size=training_params["batch_size"],
    hidden_size=training_params["hidden_size"],
    force_train=True
)

balance on test set 0.49333333333333335, balance on test set 0.52
Epoch 0/3
----------
train Loss: 0.1369 Acc: 0.9533

test Loss: 0.0059 Acc: 1.0000

Epoch 1/3
----------
train Loss: 0.0022 Acc: 1.0000

test Loss: 0.0034 Acc: 1.0000

Epoch 2/3
----------
train Loss: 0.0006 Acc: 1.0000

test Loss: 0.0008 Acc: 1.0000

Epoch 3/3
----------
train Loss: 0.0007 Acc: 1.0000

test Loss: 0.0003 Acc: 1.0000



In [21]:
hyperparams = {
        "loss_type": "BCE"
    }

# load a recourse model and pass black box model
cl = Wachter(model, hyperparams)

In [22]:
# generate counterfactual samples
factuals = predict_negative_instances(model, dataset._df).sample(num)
print("Number of factuals", len(factuals))
factuals

Number of factuals 10


Unnamed: 0,feature1,feature2,target
19,0.328684,0.296963,0.0
88,0.298005,0.200744,0.0
61,0.292615,0.411475,0.0
83,0.265759,0.24756,0.0
82,0.219871,0.21127,0.0
86,0.409835,0.204356,0.0
52,0.071298,0.243434,0.0
16,0.109394,0.148594,0.0
62,0.0,0.343147,0.0
2,0.254461,0.012462,0.0


In [14]:
counterfactuals = cl.get_counterfactuals(factuals)
print("Number of counterfactuals:", len(counterfactuals.dropna()))
counterfactuals

[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
Number of counterfactuals: 10


Unnamed: 0,feature1,feature2,target
79,0.540355,0.487843,1.0
10,0.387757,0.651594,1.0
9,0.465761,0.570289,1.0
45,0.53584,0.495119,1.0
88,0.558352,0.461089,1.0
22,0.515431,0.505135,1.0
7,0.587479,0.432728,1.0
0,0.585783,0.430827,1.0
24,0.464051,0.564511,1.0
96,0.525991,0.505883,1.0


In [9]:
counterfactuals

Unnamed: 0,feature1,feature2,target
79,0.540355,0.487843,1.0
10,0.387757,0.651594,1.0
9,0.465761,0.570289,1.0
45,0.53584,0.495119,1.0
88,0.558352,0.461089,1.0
22,0.515431,0.505135,1.0
7,0.587479,0.432728,1.0
0,0.585783,0.430827,1.0
24,0.464051,0.564511,1.0
96,0.525991,0.505883,1.0


In [10]:
post = dataset._df.copy()
update_dataset(post, factuals, counterfactuals)

AttributeError: 'DataFrame' object has no attribute '_df'

In [None]:
import matplotlib.pyplot as plt

plt.scatter(dataset._df['feature1'], dataset._df['feature2'], c=dataset._df['target'])
plt.show()

plt.scatter(post['feature1'], post['feature2'], c=post['target'])
plt.show()

# plt.scatter(factuals['feature1'], factuals['feature2'], c=factuals['target'])
# plt.show()

In [None]:
from carla.evaluation.benchmark import Benchmark
benchmark = Benchmark(model, cl, factuals)
benchmark.run_benchmark()

In [23]:
for i in range(10):
    print(cl.get_counterfactuals(factuals.iloc[:2]))

[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
    feature1  feature2  target
19  0.457471  0.425749     1.0
88  0.486317  0.389054     1.0
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
    feature1  feature2  target
19  0.457471  0.425749     1.0
88  0.486317  0.389054     1.0
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
    feature1  feature2  target
19  0.457471  0.425749     1.0
88  0.486317  0.389054     1.0
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse]
    feature1  feature2  target
19  0.457471  0.425749     1.0
88  0.486317  0.389054     1.0
[INFO] Counterfactual Explanation Found [wachter.py wachter_recourse

In [28]:
hyperparams = {
                "data_name": 'custom',
                "train_vae": True,
                "width": 10,
                "depth": 3,
                "latent_dim": 12,
                "batch_size": 4,
                "epochs": 5,
                "lr": 0.0001,
                "early_stop": 20,
            }

cl = train_recourse_method(dataset, model, 'custom', 'CLUE', hyperparams)

for i in range(10):
    print(cl.get_counterfactuals(factuals.iloc[6:8]))

[INFO] 
Net: [utils.py __init__]
[INFO] VAE_gauss_net [fc_gauss_cat.py __init__]
[INFO] Total params: 0.00M [fc_gauss_cat.py create_net]
[INFO] 
Network: [train.py train_VAE]
[INFO] 
Train: [train.py train_VAE]
[INFO] init cost variables: [train.py train_VAE]
[INFO] it 0/5, vlb -5.293244,  [train.py train_VAE]
[INFO] time: 0.423972 seconds
 [train.py train_VAE]
[INFO] vlb -2.901909 (-inf)
 [train.py train_VAE]
[INFO] Writting C:\Users\drobi\carla\models\autoencoders\clue\fc_VAE_custom_models\theta_best.dat
 [utils.py save]
[INFO] it 1/5, vlb -5.167222,  [train.py train_VAE]
[INFO] time: 0.369002 seconds
 [train.py train_VAE]
[INFO] vlb -4.477695 (-2.901909)
 [train.py train_VAE]
[INFO] it 2/5, vlb -5.325794,  [train.py train_VAE]
[INFO] time: 0.459026 seconds
 [train.py train_VAE]
[INFO] vlb -4.596470 (-2.901909)
 [train.py train_VAE]
[INFO] it 3/5, vlb -5.245922,  [train.py train_VAE]
[INFO] time: 0.458000 seconds
 [train.py train_VAE]
[INFO] vlb -4.389636 (-2.901909)
 [train.py train