In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# from sklearn.ensemble import GradientBoostingClassifier
import rce
from scipy.stats import sem
import Datasets as DS

In [2]:
dataset_name = 'ionosphere'
num_instances = 20
time_limit = 100
rho = 0.01
unc_type = 'linf'

clf_dict = {'linear': [0], 'cart': [3, 5, 10], 'rf': [5, 10, 20, 50, 100],
           'gbm': [5, 10, 20, 50, 100], 'mlp': [(10,), (10, 10, 10), (50,), (100,)]}

clf_dict = {'mlp': [(100,)]}

## Load dataset

In [3]:
data = getattr(DS,dataset_name)
df = data('../data/')

scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns)
X = df.iloc[:,:-1]
y = df['Outcome']
df.head()

Unnamed: 0,X_0,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,X_9,...,X_25,X_26,X_27,X_28,X_29,X_30,X_31,X_32,X_33,Outcome
0,1.0,0.0,0.997695,0.470555,0.926215,0.51153,0.91699,0.31146,1.0,0.5188,...,0.244145,0.70539,0.26916,0.60633,0.32955,0.711335,0.227565,0.593205,0.2735,1.0
1,1.0,0.0,1.0,0.405855,0.965175,0.31922,0.44566,0.032015,1.0,0.477255,...,0.367155,0.39766,0.407995,0.4048,0.442035,0.41687,0.46856,0.43131,0.487765,0.0
2,1.0,0.0,1.0,0.483175,1.0,0.502425,1.0,0.43969,0.944825,0.50599,...,0.2989,0.79492,0.389275,0.7155,0.413175,0.80218,0.3791,0.780225,0.30881,1.0
3,1.0,0.0,1.0,0.274195,1.0,1.0,0.85608,0.0,0.5,0.5,...,0.953475,0.758065,1.0,1.0,0.399505,0.62841,1.0,0.33809,1.0,0.0
4,1.0,0.0,1.0,0.487995,0.9707,0.532655,0.96053,0.383725,0.88576,0.418005,...,0.17421,0.56645,0.23397,0.512155,0.189015,0.471465,0.202135,0.47696,0.171515,1.0


## Robust counterfcatual explanation

In [None]:
fnamefull = './results_%s/%s_results_%s.txt' % (dataset_name,dataset_name,unc_type)

num_iterations_dict = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}
comp_time_dict = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}
dist_early_stops = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}
early_stops_iter = {(i, j):[] for i in clf_dict.keys() for j in clf_dict[i]}


for clf_type in clf_dict.keys():
    for param in clf_dict[clf_type]:

        if clf_type == 'cart':
            clf = DecisionTreeClassifier(max_depth=param).fit(X, y)
        elif clf_type == 'rf':
            clf = RandomForestClassifier(max_depth=3, random_state=0, n_estimators=param).fit(X, y)
        elif clf_type == 'mlp':
            clf = MLPClassifier(hidden_layer_sizes=param, activation='relu', random_state=0, max_iter=10000).fit(X, y)
        elif clf_type == 'gbm':
            clf = GradientBoostingClassifier(n_estimators=param, learning_rate=1.0, max_depth=2, random_state=0).fit(X, y)
        elif clf_type == 'linear':
            clf = LogisticRegression(random_state=param).fit(X, y)

        for i in range(4):
            print(f'######## Iteration number: {i} ########')
            np.random.seed(i)
            u = pd.DataFrame([X.iloc[i, :]])
            
            
            if clf_type == 'linear':
                it = False
            else:
                it = True
            
            final_model, num_iterations, comp_time, x_, solutions_master_dict = rce.generate(clf, X, y, '../experiments/results_%s' % dataset_name, clf_type, 'binary', u, list(u.columns), [], [], {}, [], [], [], rho,
                             unc_type=unc_type, iterative=it, time_limit=time_limit)
            
            if x_ is not None:
                solution_subopt, dist = rce.find_maxrad(x_, clf_type, 'results_%s' % dataset_name, x_.columns, [], [], {}, [], [], [], clf.predict(u)[0], unc_type)
            elif x_ is None or dist + rho/100 < rho
                best_dist = 0
                for i in range(len(solutions_master_dict)):
                    x_ = solutions_master_dict[i]['sol']
                    solution_subopt_i, dist_i = rce.find_maxrad(x_, clf_type, 'results_%s' % dataset_name, x_.columns, [], [], {}, [], [], [], clf.predict(u)[0], unc_type)
                    if dist_i >= best_dist:
                        best_dist = dist_i
                print(best_dist)
                dist_early_stops[(clf_type, param)].append(best_dist)
                early_stops_iter[(clf_type, param)].append(num_iterations)
                print('\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ERROR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n\n')

            else:
                num_iterations_dict[(clf_type, param)].append(num_iterations)
                comp_time_dict[(clf_type, param)].append(comp_time)

            # write results to .txt file
            num_iterations_list = num_iterations_dict[(clf_type, param)] 
            comp_time_list = comp_time_dict[(clf_type, param)] 
            dist_early_stops_list = dist_early_stops[(clf_type,param)]
            early_stops_iter_list = early_stops_iter[(clf_type, param)]

            txt = '{0}: \t {1} \t {2:.2f} ({3:.2f}) \t {4:.2f} ({5:.2f}) \t {6} \t {7:.3f} ({8:.3f}) \t {9:.2f} ({10:.2f}) '.format(clf_type,
                                      param,
                                      np.mean(comp_time_list),
                                        sem(comp_time_list),
                                        np.mean(num_iterations_list), 
                                        sem(num_iterations_list), 
                                        len(dist_early_stops_list),
                                        np.mean(dist_early_stops_list),
                                        sem(dist_early_stops_list), 
                                        np.mean(early_stops_iter_list),
                                        sem(early_stops_iter_list))
        with open(fnamefull, 'a') as f:   
            print(txt,file=f)

######## Iteration number: 0 ########
mlp tables saved.

### Starting the NN iterative approach ###
time limit: 100


------------------------ Iteration: 0 ------------------------
Optimizing the master problem...


Exception in thread Thread-8:
Traceback (most recent call last):
  File "/Users/tabearober/opt/anaconda3/lib/python3.9/threading.py", line 973, in _bootstrap_inner
    self.run()
  File "/Users/tabearober/opt/anaconda3/lib/python3.9/threading.py", line 910, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/tabearober/opt/anaconda3/lib/python3.9/site-packages/pyomo/common/tee.py", line 505, in _mergedReader


master_model is None -- MP not solved within time limit
### Iterative approach completed in 104.7 s ###



    ready_handles = select(
OSError: [Errno 9] Bad file descriptor


x_ is None, skip instance
######## Iteration number: 1 ########
mlp tables saved.

### Starting the NN iterative approach ###
time limit: 100


------------------------ Iteration: 0 ------------------------
Optimizing the master problem...
solution master [1.0, 0.0, 1.0, 0.40585499999999997, 0.965175, 0.31922, 0.44566, 0.49678555026569354, 1.0, 0.477255, 0.75437, 0.161285, 0.67216, 0.15146500000000002, 0.24157499999999998, 0.01242500000000002, 0.527495, 0.188815, 0.665545, 0.0, 0.434245, 0.27349999999999997, 0.40972, 0.32133, 0.39834, 0.367155, 0.39766, 0.407995, 0.4048, 0.442035, 0.41687, 0.46856, 0.43130999999999997, 0.487765] generated in  0.5 s
--> Distance to the factual instance: 0.4647705502656935
--> Distance to the border: 0.0
Optimizing the adversarial problem...
Set parameter Username
Academic license - for non-commercial use only - expires 2024-01-08
Set parameter PoolSearchMode to value 1
Status: optimal
solution adv problem [0.99, -0.01, 0.99, 0.39585499999999996, 0.95517

  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',


######## Iteration number: 2 ########
mlp tables saved.

### Starting the NN iterative approach ###
time limit: 100


------------------------ Iteration: 0 ------------------------
Optimizing the master problem...


In [None]:
num_iterations_dict