In [1]:
from pulp import *
from pulp import LpProblem, LpVariable, LpMinimize, LpInteger, lpSum, value, LpBinary,LpStatusOptimal
import pulp
import numpy as np
import pandas as pd
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore", message="Overwriting previously set objective.")
import utility
import docplex.mp.model
import docplex
import docplex_explainer
import mymetrics

In [2]:
# Load Dataset
dataset_name = 'Column'
df = pd.read_csv('./datasets/column_2C.dat', sep=" ", names=['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis','target'])
df['target']=np.where(df['target']=='AB',1,0)

In [3]:
# Scale
scaler = MinMaxScaler()
scaler.fit(df.values[:, :-1])
scaled_df = scaler.transform(df.values[:, :-1])

In [4]:
# Get scaled bounds
lower_bound = scaled_df.min()
upper_bound = scaled_df.max()
print(lower_bound, upper_bound)

0.0 1.0000000000000002


In [5]:
# Check if binary targets
df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
df_scaled['target'] = targets

Original Targets:  [0. 1.] 
Desired Targets: [0,1]
Is original the desired [0, 1]?  True


In [11]:
# Train model
X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.3,random_state=50,stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

clf = svm.SVC(kernel='linear')

# Train the model using the training set
clf.fit(X_train, y_train)

# Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy Linear:", metrics.accuracy_score(y_test, y_pred))

Accuracy Linear: 0.8064516129032258


In [13]:
# Finding patterns classified as positive/negative
positive_indexes,negative_indexes = utility.find_indexes(clf, X_test, threshold=0)
print(f"Positive patterns = {len(positive_indexes)},\nNegative patterns = {len(negative_indexes)}")

Positive patterns = 73,
Negative patterns = 20


In [15]:
# Make a dataframe with the test data. For comparing Onestep against Twostep.
test_df_names = list(df.columns)
if 'target' not in test_df_names:
    test_df_names.append('target')
test_dataset = []
for instance, test_class in zip(X_test, y_test.astype('int32')):
    test_dataset.append(np.append(instance, clf.predict(instance.reshape(1,-1))))
test_dataset_df = pd.DataFrame(np.asarray(test_dataset), columns=test_df_names)

In [17]:
# Parameter p value
p = 0.5

In [24]:
#Variables for results []
coverage_twostep = []
pos_exp_twostep = []
neg_exp_twostep = []

coverage_onestep = []
pos_exp_onestep = []
neg_exp_onestep = []
#Generate Explanations for the patterns classified as negative
for idx in  positive_indexes:
    
    #Twostep
    exp_ = docplex_explainer.twostep(
            classifier = clf,
            dual_coef = clf.dual_coef_,
            support_vectors = clf.support_vectors_,
            intercept = clf.intercept_,
            lower_bound = lower_bound,
            upper_bound = upper_bound,
            data = (X_test[idx]),
            p = p,
            positive = True)
    pos_exp_twostep.append(exp_)
    coverage_twostep.append(len(mymetrics.calculate_coverage(test_dataset_df, exp_)))
    
    #Onestep
    exp = docplex_explainer.onestep(
            classifier = clf,
            dual_coef = clf.dual_coef_,
            support_vectors = clf.support_vectors_,
            intercept = clf.intercept_,
            lower_bound = lower_bound,
            upper_bound = upper_bound,
            data = (X_test[idx]),
            positive = True)
    pos_exp_onestep.append(exp)
    coverage_onestep.append(len(mymetrics.calculate_coverage(test_dataset_df, exp)))
for i in range(len(coverage_onestep)):
    print(f'\ni={i}')
    print(coverage_onestep[i], pos_exp_onestep[i],)
    print(coverage_twostep[i], pos_exp_twostep[i])
mean_A= sum(coverage_onestep)/len(coverage_onestep)
mean_B= sum(coverage_twostep)/len(coverage_twostep)
print(mean_A,mean_B)


i=0
4 [[0.0, 1.0000000000000002], [0.0, 1.0000000000000002], [0.12025826920390137, 1.0000000000000002], [0.0, 0.26852956090560975], [0.0, 0.3074524142380902], [0.0, 1.0000000000000002]]
6 [[0.0, 1.0000000000000002], [0.0, 1.0000000000000002], [0.1543019790960832, 1.0000000000000002], [0.0, 0.35265688664561173], [0.0, 0.3088022943403024], [0.0, 1.0000000000000002]]

i=1
3 [[0.0, 1.0000000000000002], [0.0, 1.0000000000000002], [0.4285583301407089, 1.0000000000000002], [0.0, 1.0000000000000002], [0.0, 0.4816646951285085], [0.1849162011173184, 1.0000000000000002]]
3 [[0.0, 1.0000000000000002], [0.0, 1.0000000000000002], [0.47258767321047124, 1.0000000000000002], [0.0, 1.0000000000000002], [0.0, 0.4921708684037235], [0.1780863508520547, 1.0000000000000002]]

i=2
3 [[0.0, 1.0000000000000002], [0.0, 1.0000000000000002], [0.06379090043306586, 1.0000000000000002], [0.0, 0.2273487354382768], [0.0, 0.35831809872029263], [0.0765828677839851, 1.0000000000000002]]
7 [[0.0, 1.0000000000000002], [0.0

In [26]:
# Instance to be explained
instance = X_test[positive_indexes[0:1]]
instance

array([[0.35570988, 0.51982851, 0.22919277, 0.25087914, 0.30745241,
        0.02516294]])

In [28]:
scaler.inverse_transform(np.atleast_2d(instance))

array([[63.03, 22.55, 39.61, 40.48, 98.67, -0.25]])

In [31]:
#Onestep
onestep_exp = docplex_explainer.onestep(
        classifier = clf,
        dual_coef = clf.dual_coef_,
        support_vectors = clf.support_vectors_,
        intercept = clf.intercept_,
        lower_bound = lower_bound,
        upper_bound = upper_bound,
        data = (instance),
        positive = True)

In [33]:
onestep_exp

[[0.0, 1.0000000000000002],
 [0.0, 1.0000000000000002],
 [0.12025826920390137, 1.0000000000000002],
 [0.0, 0.26852956090560975],
 [0.0, 0.3074524142380902],
 [0.0, 1.0000000000000002]]

In [37]:
#Onestep Lowerbound
scaler.inverse_transform(np.atleast_2d([0,
                                       0,
                                       0.12025826920390137,
                                       0,
                                       0,
                                       0]))

array([[ 26.15    ,  -6.55    ,  27.437659,  13.37    ,  70.08    ,
        -11.06    ]])

In [39]:
#Onestep Upperbound
scaler.inverse_transform(np.atleast_2d([1.0000000000000002,
                                       1.0000000000000002,
                                       1.0000000000000002,
                                       0.26852956090560975,
                                       0.3074524142380902,
                                       1.0000000000000002]))

array([[129.83      ,  49.43      , 125.74      ,  42.38730435,
         98.67      , 418.54      ]])

In [41]:
#Twostep
twostep_exp = docplex_explainer.twostep(
        classifier = clf,
        dual_coef = clf.dual_coef_,
        support_vectors = clf.support_vectors_,
        intercept = clf.intercept_,
        lower_bound = lower_bound,
        upper_bound = upper_bound,
        data = (instance),
        p = p,
        positive = True)

In [43]:
twostep_exp

[[0.0, 1.0000000000000002],
 [0.0, 1.0000000000000002],
 [0.1543019790960832, 1.0000000000000002],
 [0.0, 0.35265688664561173],
 [0.0, 0.3088022943403024],
 [0.0, 1.0000000000000002]]

In [47]:
#Twostep Lowerbound
scaler.inverse_transform(np.atleast_2d([0,
                                       0,
                                       0.1543019790960832,
                                       0,
                                       0,
                                       0]))

array([[ 26.15      ,  -6.55      ,  31.24170314,  13.37      ,
         70.08      , -11.06      ]])

In [49]:
#Twostep Upperbound
scaler.inverse_transform(np.atleast_2d([1.0000000000000002,
                                       1.0000000000000002,
                                       1.0000000000000002,
                                       0.35265688664561173,
                                       0.3088022943403024,
                                       1.0000000000000002]))

array([[129.83      ,  49.43      , 125.74      ,  51.47810317,
         98.79552535, 418.54      ]])

In [52]:
len(mymetrics.calculate_coverage(test_dataset_df, onestep_exp))

4

In [54]:
len(mymetrics.calculate_coverage(test_dataset_df, twostep_exp))

6