In [None]:
import warnings
warnings.filterwarnings("ignore")

from carla.data.catalog import CsvCatalog
from carla import MLModelCatalog
from carla.recourse_methods import Clue, Wachter
from carla.models.negative_instances import predict_negative_instances
from sklearn.metrics import f1_score, accuracy_score
from sklearn.cluster import KMeans
from sklearn import metrics
import imageio

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os
import sys
sys.path.insert(0,'..')
from recourse_util import update_dataset, predict, print_scores 

num = 10
iter_id = 0

In [None]:
def train_model(dataset):
    training_params = {"lr": 0.005, "epochs": 4, "batch_size": 1, "hidden_size": [5]}

    model = MLModelCatalog(
        dataset,
        model_type="ann",
        load_online=False,
        backend="pytorch"
    )

    model.train(
        learning_rate=training_params["lr"],
        epochs=training_params["epochs"],
        batch_size=training_params["batch_size"],
        hidden_size=training_params["hidden_size"],
        force_train=True
    )
    
    return model

In [None]:
def load_dataset():
    dataset = CsvCatalog(
#         file_path='datasets/bimodal_dataset_1.csv',
#         file_path='datasets/unimodal_dataset_1.csv',
        file_path='datasets/unimodal_dataset_2.csv',
        categorical=[],
        continuous=['feature1', 'feature2'],
        immutables=[],
        target='target'
    )

    data_name = 'custom'
    return dataset

In [None]:
def train_recourse_method(method, model, dataset=None, data_name=None, hyperparams=None):
    rm = None
    if method == "clue":
        hyperparams = {
                "data_name": data_name,
                "train_vae": True,
                "width": 10,
                "depth": 3,
                "latent_dim": 12,
                "batch_size": 4,
                "epochs": 5,
                "lr": 0.0001,
                "early_stop": 20,
            }

        # load a recourse model and pass black box model
        rm = Clue(dataset, model, hyperparams)
        
    else:
        hyperparams = {
            "loss_type": "BCE",
            "t_max_min": 0.5/60
        }

        # load a recourse model and pass black box model
        rm = Wachter(model, hyperparams)
        
        
    return rm

In [None]:
def draw(data):

    plt.scatter(data['feature1'], data['feature2'], c=data['target'])
    plt.show()

In [None]:
def get_factuals(dataset, sample_num=5, max_m_iter=3):
    m_iter = 0
    model = train_model(dataset)
    factuals = predict_negative_instances(model, dataset._df)
    n_factuals = len(factuals)
    while (m_iter < max_m_iter and n_factuals < sample_num):
        model = train_model(dataset)
        factuals = predict_negative_instances(model, dataset._df)
        n_factuals = len(factuals)
        m_iter += 1
    
    if len(factuals) > sample_num:
        factuals = factuals.sample(sample_num)
        
    return model, factuals

In [None]:
def execute_experiment_iteration(method, dataset, model, factuals, results):
    pre = dataset._df.copy()
    
    
    print("Number of factuals", len(factuals))
    
    add_data_statistics(model, dataset, results)
    
    if method == 'clue':
        rm = train_recourse_method('clue', model, dataset, data_name='custom')
    else:
        rm = train_recourse_method('wachter', model)
        
    counterfactuals = rm.get_counterfactuals(factuals)
    print("Number of counterfactuals:", len(counterfactuals.dropna()))
    
    update_dataset(dataset, factuals, counterfactuals)
    add_data_statistics(model, dataset, results)
    
    draw(dataset._df)
    
    return dataset

In [None]:
results = {}
def get_empty_results():
    return {
        'datasets': [],
        'means': [],
        'covariances': [],
        'clustering': [],
        'accuracies': [],
        'f1_scores': [],
        'benchmark': []
    }

In [None]:
def add_data_statistics(model, dataset, results):
    results['datasets'].append(dataset._df.copy())
    results['means'].append(dataset._df[dataset.continuous].mean().to_numpy())
    results['covariances'].append(dataset._df[dataset.continuous].cov().to_numpy())
    results['clustering'].append(find_elbow(dataset))
    results['accuracies'].append(accuracy_score(np.array(dataset._df[dataset.target]), predict(model, dataset)))
    results['f1_scores'].append(f1_score(np.array(dataset._df[dataset.target]), predict(model, dataset)))

In [None]:
def find_elbow(dataset, n=10):
    ch_metrics = []
    x = dataset.df[dataset.continuous]
    
    for i in range(2, n):
        model = KMeans(n_clusters=i, random_state=1).fit(x)
        ch_metrics.append(metrics.calinski_harabasz_score(x, model.labels_))
        
    return ch_metrics.index(np.max(ch_metrics)) + 2

In [None]:
def generate_animation(results, method='clue'):
    data = results[method]['datasets']
    names = [f"images/{method}{str(n)}.png" for n in range(len(data))]
    
    for i, name in enumerate(names):
        plt.scatter(data[i]['feature1'], data[i]['feature2'], c=data[i]['target'])
        plt.savefig(name)
        plt.close()
        
    gif_path = f"gifs/{method}_gif_{iter_id}.gif"
        
    with imageio.get_writer(f'{gif_path}', mode='I') as writer:
        for filename in names:
            image = imageio.imread(filename)
            writer.append_data(image)
            
    print(f"Saved gif to {gif_path}")
        
    for filename in set(names):
        os.remove(filename)

In [None]:
iter_id += 1
dataset = load_dataset()

clue_dataset = load_dataset()
clue_result = get_empty_results()
results['clue'] = clue_result

wachter_dataset = load_dataset()
wachter_result = get_empty_results()
results['wachter'] = wachter_result

iterations = 50
samples = 1

for i in range(iterations):
    clue_model, clue_factuals = get_factuals(clue_dataset, sample_num=samples)
    wachter_model, wachter_factuals = get_factuals(wachter_dataset, sample_num=samples)
    
    factuals = pd.merge(clue_factuals, wachter_factuals, how='inner', on=[*dataset.continuous, dataset.target])
    factuals = pd.merge(factuals, dataset._df, how='inner', on=dataset.continuous)
    
    execute_experiment_iteration('clue', clue_dataset, clue_model, clue_factuals, clue_result)
    execute_experiment_iteration('wachter', wachter_dataset, wachter_model, wachter_factuals, wachter_result)

In [None]:
results

In [None]:
generate_animation(results, 'clue')
generate_animation(results, 'wachter')