In [83]:
from sklearn.datasets import load_wine
from sklearn.cluster import KMeans
import random

from lib.baseline import *
from lib.baycon import execute

from lib.CF_De.cfde import CF_Descent
from lib.cf_a.cfae import CF_Ascent
from dataclasses import dataclass
from lib.eval.generator import run

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Cross testing of methods

In [84]:
X, true_y = load_wine(return_X_y=True)
nc = 3
km = KMeans(n_clusters=nc)
y = km.fit_predict(X)
centers = km.cluster_centers_
print(centers)

@dataclass
class CounterfactualData:
    cf: np.array
    instance: np.array
    target: float
    instance_label: float

n = 1
cfs = []
for i in range(n):
    index = random.randint(0, len(X)-1)
    label = y[index]
    target = label
    while target == label:
        target = random.randint(0, nc-1)
    
    cfs.append(CounterfactualData([], index, float(target), label))

[[1.25985294e+01 2.45343137e+00 2.32186275e+00 2.06460784e+01
  9.36960784e+01 2.05362745e+00 1.64754902e+00 3.95980392e-01
  1.42509804e+00 4.67333332e+00 9.17843137e-01 2.39480392e+00
  5.21558824e+02]
 [1.33691837e+01 2.40000000e+00 2.39265306e+00 1.85142857e+01
  1.09081633e+02 2.44163265e+00 2.21367347e+00 3.25510204e-01
  1.70673469e+00 5.18836735e+00 9.59714286e-01 2.84795918e+00
  9.06346939e+02]
 [1.38507407e+01 1.77851852e+00 2.48777778e+00 1.69259259e+01
  1.05629630e+02 2.94148148e+00 3.13666667e+00 2.98888889e-01
  2.00703704e+00 6.27518519e+00 1.10296296e+00 3.00222222e+00
  1.30877778e+03]]


## Baseline

In [85]:
from lib.baseline import baseline_explainer
from lib.baycon import baycon_explainer
from sklearn.ensemble import RandomForestClassifier

explainer = baycon_explainer
baseline_cfs = []
for cf in cfs:
    classifier = RandomForestClassifier(n_estimators=100)
    baseline_cf = baseline_explainer(X, y, classifier, explainer, cf.instance, target_cluster=int(cf.target))
    baseline_cfs.append(CounterfactualData(baseline_cf, cf.instance, cf.target, cf.instance_label))

Done training classifier. Score: 1.0
--- Executing... Initial Instance: 94 Target: 1 Run: 0 ---
--- Step 0: Load internal objects ---
--- Step 1: Generate initial neighbours ---
Generated initial neighbours: (617) CFs (0)
Re-training surrogate model with data size: 617
--- Step 2: Explore neighbourhood ---
--- epoch 1 ----
Generating neighbors for 1 near best instances
Generated neighbours: (27) Unique overall (27)
Generated random instances: (2661)
Predicted top: 20 Counterfactuals: 8
Found new best [1.162000e+01 1.255965e+00 2.280000e+00 1.800000e+01 9.800000e+01
 3.163815e+00 2.260000e+00 1.700000e-01 1.350000e+00 3.250000e+00
 1.171780e+00 2.960000e+00 7.447330e+02], with fitness score 0.6578 (X 0.95022 Y 1.0 F 0.69231), oversampling
Known alternatives: 637
Best instance score 0.6578 (X 0.95022 Y 1.0 F 0.69231), found on epoch: 1
Re-training surrogate model with data size: 647
--- epoch 2 ----
Generating neighbors for 20 near best instances
Generated neighbours: (44) Unique overall

## Baycon

In [86]:
from lib.ext.baycon.common.Target import Target
baycon_cfs = []
modes = ['classic_baycon', 'kmeans', 'model_agnostic']
run_mode = modes[0]


if run_mode == modes[0]:
    model = RandomForestClassifier()
elif run_mode == modes[1]:
    model = KMeans(n_clusters=3)
elif run_mode == modes[2]:
    model = KMeans(n_clusters=3)
    # model = SpectralClustering(n_clusters=3)
else:
    raise("Invalid baycon mode")
model.fit(X, y)
pred = model.predict(X)
pred = [str(l) for l in pred]
stacked = np.column_stack((X, pred))
df = pd.DataFrame(stacked, columns=[f'x{i+1}' for i in range(X.shape[1])]+['label'], dtype=float)
df

baycon_cfs = []
for cf in cfs:
    point_idx = cf.instance
    point_pred = int(model.predict([X[point_idx]])[0])
    if run_mode == modes[0]:
        t = Target(target_type="classification", target_feature="label", target_value=int(cf.target))
    elif run_mode == modes[1]:
        t = Target(target_type="clustering_kmeans", target_feature="label", target_value=int(cf.target))
    elif run_mode == modes[2]:
        t = Target(target_type="model_agnostic", target_feature="label", target_value=int(cf.target))

    baycon_cf, _predictions, _initial_instance, _initial_prediction = execute(df, model, t, point_idx)
    baycon_cfs.append(CounterfactualData(baycon_cf, cf.instance, cf.target, cf.instance_label))

--- Executing... Initial Instance: 94 Target: 1 Run: 0 ---
--- Step 0: Load internal objects ---
--- Step 1: Generate initial neighbours ---
Generated initial neighbours: (615) CFs (0)
Re-training surrogate model with data size: 615
--- Step 2: Explore neighbourhood ---
--- epoch 1 ----
Generating neighbors for 1 near best instances
Generated neighbours: (14) Unique overall (14)
Generated random instances: (2646)
Predicted top: 20 Counterfactuals: 3
Found new best [1.422465e+01 1.990000e+00 2.280000e+00 1.800000e+01 9.800000e+01
 3.020000e+00 2.260000e+00 1.700000e-01 1.306140e+00 3.250000e+00
 1.160000e+00 2.960000e+00 1.093532e+03], with fitness score 0.6782 (X 0.88162 Y 1.0 F 0.76923), oversampling
Known alternatives: 635
Best instance score 0.6782 (X 0.88162 Y 1.0 F 0.76923), found on epoch: 1
Re-training surrogate model with data size: 645
--- epoch 2 ----
Generating neighbors for 20 near best instances
Generated neighbours: (66) Unique overall (78)
Generated random instances: (10

## CFDE

In [87]:
cfde_cfs = []
amount = 10
for cf in cfs:
    temp_cfs = []
    for i in range(amount):
        x1, cf1, h1 = CF_Descent(X=X,y=y,target=cf.target,centers=centers,instance_index=cf.instance)
        temp_cfs.append(cf1)
    cfde_cfs.append(CounterfactualData(temp_cfs, cf.instance, cf.target, cf.instance_label))    

cfde_cfs

[CounterfactualData(cf=[array([1.16200000e+01, 2.40000000e+00, 2.39265306e+00, 1.80000561e+01,
        9.80000000e+01, 3.02000000e+00, 2.21367347e+00, 3.25510204e-01,
        1.70673469e+00, 3.25000000e+00, 9.59714286e-01, 2.84795918e+00,
        7.18181031e+02]), array([1.16200000e+01, 2.40000000e+00, 2.39265306e+00, 1.85142857e+01,
        9.80000000e+01, 3.02000000e+00, 2.21367347e+00, 3.25510204e-01,
        1.70673469e+00, 3.25000000e+00, 9.59714286e-01, 2.84795918e+00,
        7.23939567e+02]), array([1.16200000e+01, 2.40000000e+00, 2.39265306e+00, 1.85142857e+01,
        9.80000000e+01, 3.02000000e+00, 2.21367347e+00, 3.25510204e-01,
        1.70673469e+00, 3.25000000e+00, 9.59714286e-01, 2.84795918e+00,
        7.18967390e+02]), array([1.16200000e+01, 2.40000000e+00, 2.39265306e+00, 1.80003770e+01,
        9.80000000e+01, 3.02000000e+00, 2.21367347e+00, 3.25510204e-01,
        1.70673469e+00, 3.25000000e+00, 9.59714286e-01, 2.84795918e+00,
        7.18868696e+02]), array([1.162

## CFAE

In [88]:
cfae_cfs = []
amount = 10
for cf in cfs:
    temp_cfs = []
    for i in range(amount):
        x2, cf2, h2 = CF_Ascent(X=X,y=y,target=cf.target,centers=centers,instance_index=cf.instance)
        temp_cfs.append(cf2)
    cfae_cfs.append(CounterfactualData(temp_cfs, cf.instance, cf.target, cf.instance_label))

cfae_cfs

[CounterfactualData(cf=[array([1.16200000e+01, 1.99000000e+00, 2.28000000e+00, 1.80000000e+01,
        9.80000000e+01, 3.02000000e+00, 2.26000000e+00, 1.70000000e-01,
        1.35000000e+00, 3.25000000e+00, 1.16000000e+00, 2.96000000e+00,
        7.10150306e+02]), array([1.16200000e+01, 1.99000000e+00, 2.28000000e+00, 1.80000000e+01,
        9.80000000e+01, 3.02000000e+00, 2.26000000e+00, 1.70000000e-01,
        1.35000000e+00, 3.25000000e+00, 1.16000000e+00, 2.96000000e+00,
        6.99170254e+02]), array([1.16200000e+01, 1.99000000e+00, 2.28000000e+00, 1.80000000e+01,
        9.80000000e+01, 3.02000000e+00, 2.26000000e+00, 1.70000000e-01,
        1.35000000e+00, 3.25000000e+00, 1.16000000e+00, 2.96000000e+00,
        7.01246625e+02]), array([1.16200000e+01, 1.99000000e+00, 2.28000000e+00, 1.80000000e+01,
        9.80000000e+01, 3.02000000e+00, 2.26000000e+00, 1.70000000e-01,
        1.35000000e+00, 3.25000000e+00, 1.16000000e+00, 2.96000000e+00,
        7.11558356e+02]), array([1.162

## Testing

In [89]:
baseLine = {
    "name": "Baseline",
    "counterfactuals": baseline_cfs
}
Baycon = {
    "name": "Baycon",
    "counterfactuals": baycon_cfs
}
cfde = {
    "name": "CF-DE",
    "counterfactuals": cfde_cfs
}
cfae = {
    "name": "CF-AE",
    "counterfactuals": cfae_cfs
}

methods = [baseLine, cfde, cfae]
print(run(methods, centers, X, y))


[[1.162000e+01 1.323855e+00 2.103040e+00 ... 7.502250e-01 2.053975e+00
  1.272645e+03]
 [1.162000e+01 1.289910e+00 2.280000e+00 ... 1.160000e+00 2.960000e+00
  8.484300e+02]
 [1.162000e+01 1.255965e+00 2.316630e+00 ... 7.502250e-01 2.960000e+00
  1.046397e+03]
 ...
 [1.401961e+01 1.990000e+00 2.280000e+00 ... 1.160000e+00 2.960000e+00
  9.144190e+02]
 [1.409650e+01 1.990000e+00 2.280000e+00 ... 1.160000e+00 2.960000e+00
  9.521270e+02]
 [1.419902e+01 1.990000e+00 2.280000e+00 ... 1.160000e+00 2.960000e+00
  9.521270e+02]]


AttributeError: 'list' object has no attribute 'shape'