# Installing Z3 and Imports

In [1]:
'''
!pip install z3-solver
!pip install pandas
!pip install numpy
!pip install sklearn
!pip install anchor-exp
'''

'\n!pip install z3-solver\n!pip install pandas\n!pip install numpy\n!pip install sklearn\n!pip install anchor-exp\n'

In [2]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from z3 import *
import time

# Training Linear and Polynomial SVMs

## Data Preprocessing.

In [3]:
cancer = datasets.load_breast_cancer()

In [4]:
df = pd.DataFrame(cancer.data, columns = cancer.feature_names)

In [5]:
df

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [6]:
normalized_df=(cancer.data-cancer.data.min())/(cancer.data.max()-cancer.data.min())
#normalized_df=(cancer.data-cancer.data.mean())/cancer.data.std()

In [7]:
cancer.data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [8]:
normalized_df.min(),normalized_df.max()

(0.0, 1.0)

In [9]:
def check_targets(original_set):
    original_unique = np.unique(original_set)
    print("Original Targets: ",original_unique,"\nDesired Targets: [-1,1]")
    print("Is original the desired [1,-1]? ", np.array_equiv(original_unique,np.array([-1,1])))
    if not np.array_equiv(original_unique,np.array([-1,1])):
        if 1 in original_unique:
            print("1 exists in dataset")
            new = np.select([original_set == original_unique[0]],[-1],original_set)
        elif -1 in original_unique:
            print("-1 exists in dataset")
            new = np.select([original_set == original_unique[1]],[1],original_set)
        else:
            print("Neither exists in dataset")
            new = np.select([original_set == original_unique[0],original_set == original_unique[1]],[-1,1],original_set)
        #indexes = original_set[np.where(original_set == unique_elems[0])]
        print("New datasets consists of: ",np.unique(new))
        return new

In [10]:
targets = check_targets(cancer.target)

Original Targets:  [0 1] 
Desired Targets: [-1,1]
Is original the desired [1,-1]?  False
1 exists in dataset
New datasets consists of:  [-1  1]


## Data Separation and Training

In [11]:
X_train, X_test, y_train, y_test = train_test_split(normalized_df, targets, test_size=0.3,random_state=107) # 70% training and 30% test

In [12]:
len(X_train[0])

30

In [13]:
def create_linear_classifier(kernel_type='linear'):
    return svm.SVC(kernel=kernel_type)
def create_poly_classifier(kernel_type='poly',my_degree=2,my_gamma=1/30):
    return svm.SVC(kernel=kernel_type, degree = my_degree,gamma=my_gamma)

In [14]:
clf = create_linear_classifier()
#poly = create_poly_classifier('poly',2,1/(X_train.var() * len(X_train[0])))

#Train the models using the training sets
clf.fit(X_train, y_train)
#poly.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)
#poly_y_pred = poly.predict(X_test)
print("Accuracy Linear:", metrics.accuracy_score(y_test, y_pred))
#print("Accuracy Poly:", metrics.accuracy_score(y_test, poly_y_pred))

Accuracy Linear: 0.9064327485380117


In [15]:
y_pred_train = clf.predict(X_train)
#poly_pred_train = poly.predict(X_train)
print("Accuracy on Training:", metrics.accuracy_score(y_train, y_pred_train))
#print("Accuracy on Training:", metrics.accuracy_score(y_train, poly_pred_train))

Accuracy on Training: 0.8592964824120602


## SVM Decision Function For The First Element of Training Dataset

In [16]:
#Sum (coef @ sup_vec @ X[index] + bias)
((clf.dual_coef_ @ clf.support_vectors_) @ X_train[0].reshape(1, len(X_train[0])).T + clf.intercept_)[0][0]

0.48607829979011963

In [17]:
#(poly.dual_coef_ @ (((poly.support_vectors_ @ X_train[0].reshape(1, len(X_train[0])).T) * poly.gamma + poly.coef0) ** poly.degree) + poly.intercept_)[0][0]

In [18]:
#Linear SVM Decision Function
print("DualCoef / Support Vectors / X_Train.T Reshaped / Intercept (bias)")
clf.dual_coef_.shape, clf.support_vectors_.shape, X_train[0].reshape(1, len(X_train[0])).T.shape, clf.intercept_.shape

DualCoef / Support Vectors / X_Train.T Reshaped / Intercept (bias)


((1, 194), (194, 30), (30, 1), (1,))

In [19]:
#Polynomial SVM Decision Function
#print("DualCoef / Support Vectors / X_Train.T Reshaped / Gamma / Coef0 / Degree / Intercept (bias)")
#poly.dual_coef_.shape, poly.support_vectors_.shape, X_train[0].reshape(1, len(X_train[0])).T.shape,poly.gamma, poly.coef0, poly.degree, poly.intercept_

# Defining Thresholds and Finding Rejecteds

In [20]:
def limits(classifier,data):
    dec_fun = classifier.decision_function(data)
    lim_pos = dec_fun[np.argmax(dec_fun)]
    lim_neg = dec_fun[np.argmin(dec_fun)]
    return dec_fun, lim_pos, lim_neg

In [21]:
def find_thresholds(decfun,t1,t2,wr,chosen_min='EWRR'):
    solution = {'WR':0,'T1':0, 'T2':0,'E':0,'R':0,'EWRR':0}
    index = None
    n_elements = decfun.shape[0]
    for i,wr_ in enumerate(wr):
      for j in range(0,len(t1)):
        #Get Number of Rejected
        positive_indexes = np.where(decfun >= t1[j])
        negative_indexes = np.where(decfun < t2[j])
        rejected_indexes = np.where((decfun < t1[j]) & (decfun >= t2[j]))
        R = rejected_indexes[0].shape[0]
        #np.array(positive_indexes).shape,np.array(negative_indexes).shape, R

        #Get Number of Misclassifications
        class_p = y_train[positive_indexes]
        class_n = y_train[negative_indexes]
        error_p = np.where(class_p == np.unique(y_train)[0])[0].shape[0]
        error_n = np.where(class_n == np.unique(y_train)[1])[0].shape[0]
        E = (error_p + error_n)/(n_elements - R)
        R2 = R/n_elements
        #print("T1 ",round(t1[j],4),"T2 ",round(t2[j],4),"E",round(E,4),"Rej",R,"R/Total: ",R2,"Wr: ",wr_, "EwrR: ",E + wr_*R2)
        if chosen_min=='R':
            if (i == 0 and i == j) or R < solution['R']:
                solution['WR'] = wr_
                solution['T1'] = t1[j]
                solution['T2'] = t2[j]
                solution['E'] = E
                solution['R'] = R
                solution['EWRR'] = E + wr_ * R2
        elif chosen_min=='E':
            if (i == 0 and i == j) or E < solution['E']:
                solution['WR'] = wr_
                solution['T1'] = t1[j]
                solution['T2'] = t2[j]
                solution['E'] = E
                solution['R'] = R
                solution['EWRR'] = E + wr_ * R2
        elif chosen_min=='EWRR':
            if (i == 0 and i == j) or (E + wr_ * R2) < solution['EWRR']:
                solution['WR'] = wr_
                solution['T1'] = t1[j]
                solution['T2'] = t2[j]
                solution['E'] = E
                solution['R'] = R
                solution['EWRR'] = E + wr_ * R2
        else:
            return 'Chosen option "' +chosen_min+'" is invalid'
    print('Thresholds by min(',chosen_min,') from solution: ',solution)
    return solution['T1'], solution['T2']      
                

In [22]:
def find_indexes(decfun,t1,t2):
    positive_indexes = np.where(decfun >= t1)[0]
    negative_indexes = np.where(decfun < t2)[0]
    rejected_indexes = np.where((decfun < t1) & (decfun >= t2))[0]
    R = rejected_indexes.shape[0]
    return positive_indexes,negative_indexes,rejected_indexes

In [23]:
def find_thresholds_and_indexes(classifier,data,wr = None):  
    dec_fun,lim_pos,lim_neg = limits(classifier,data)
    print("Superior Limit: ",lim_pos,"\nInferior Limit: ",lim_neg)
    if wr == None:
        wr = [0.04, 0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32, 0.36, 0.4, 0.44, 0.48]
    t1 = []
    t2 = []
    for i in range (1,21):
      t1.append(0.05*i*lim_pos)
      t2.append(0.05*i*lim_neg)  
    T1,T2 = find_thresholds(dec_fun,t1,t2,wr)
    pos_idx,neg_idx,rej_idx = find_indexes(dec_fun,T1,T2)
    return T1, T2, pos_idx, neg_idx, rej_idx,lim_pos,lim_neg

# Implementing SVM function for Z3 Solver

## Z3 Decision Function Elements

In [24]:
np.RealVal = np.vectorize(RealVal) 
np.RealVector = np.vectorize(RealVector) 

In [25]:
def to_z3_conversion(classifier,training_set):
    z3_dual_coef = np.RealVal(classifier.dual_coef_)
    z3_support_vectors = np.RealVal(classifier.support_vectors_)
    z3_intercept_ = np.RealVal(classifier.intercept_)
    z3_X_Train = np.RealVector('x',training_set.shape[1])
    if classifier.kernel == 'poly':
        z3_gamma = np.RealVal(classifier.gamma)
        z3_coef0 = np.RealVal(classifier.coef0)
        z3_degree = np.RealVal(classifier.degree)
        return z3_dual_coef,z3_support_vectors,z3_intercept_,z3_X_Train, z3_gamma,z3_coef0,z3_degree
    return z3_dual_coef,z3_support_vectors,z3_intercept_,z3_X_Train

# Z3 with Reject Option

## Explaining the Classifier's Decision Function and Finding Relevant Features

In [26]:
def z3_explanation(classifier,t1, t2, X, z3_coef, z3_sup_vec, z3_X, z3_intercept, reject_indexes,sup_lim,inf_lim,
                   z3_gamma = None, z3_coef0 = None, z3_degree = None, show_values=True, min=0,max=1,positive=False,negative=False,rejected=False):
    elapsed_time = []
    relevant = []
    irrelevant = []
    global_values = []
    print("Number of Instances: ", len(reject_indexes))
    solver = Solver()
    if classifier.kernel=='linear':
        print("Classifier: Linear")
        if rejected:
            print("Declared: Rejected Instances")
            solver.add(Or(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] >= t1,
                          ((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] < t2))
        elif positive:
            print("Declared: Positive Instances")
            solver.add(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] < t1)
        elif negative:
            print("Declared: Negative Instances")
            solver.add(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] >= t2)
        else:
            print("WARNING: Must declare if are positive,negative or rejected instances!")
        solver.add(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] >= inf_lim)
        solver.add(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] <= sup_lim)
    elif classifier.kernel=='poly':
        print("Classifier: Polynomial")
        solver.add(Or(((z3_dual_coef @ (((z3_support_vectors @ z3_X.reshape(1, len(z3_X)).T) * z3_gamma + z3_coef0) ** z3_degree) + z3_intercept_)[0][0] >= t1),
                      (z3_dual_coef @ (((z3_support_vectors @ z3_X.reshape(1, len(z3_X)).T) * z3_gamma + z3_coef0) ** z3_degree) + z3_intercept_)[0][0] < t2))
    for j in range(0, len(z3_X)):
        solver.add(z3_X[j] >= min)
        solver.add(z3_X[j] <= max)
    solver.push()
    for i in range(0, len(reject_indexes)):
        # Add Assertions for 0<= feature <= 1
        index_list = list(range(len(z3_X)))
        unsat_list = []
        sat_list = []
        values = []

        # Select a feature and unfix it
        start = time.perf_counter()
        for z in range(0, len(z3_X)):   
            for j in range(0, len(z3_X)):
                if j != z and j in index_list:  # Choose one to check influence
                    solver.add(z3_X[j] == X[reject_indexes[i]][j])            
            check = solver.check()
            if check == sat:
                model = solver.model()
                value = model[z3_X[z]].numerator_as_long() / model[z3_X[z]].denominator_as_long()
                sat_list.append(z)
                values.append(value)
                if show_values:
                    print('i = ', i, z, check, X[reject_indexes[i]][z], value)
            else:
                unsat_list.append(z)
                index_list.remove(z)
                if show_values:
                    print('i = ', i, z, check)
            
            solver.pop()
            solver.push()
        elapsed_time.append(time.perf_counter()  - start)    
        print("Finished ", i)
        relevant.append(sat_list)
        irrelevant.append(unsat_list)
        global_values.append(values)
        # print("Relevant: ",sat_list, '\nUnsat List: ',unsat_list,'\n')      
    for i in range(0, len(relevant)):
        if (show_values):
            print('Instance ', i, '\nRelevant Features: ', relevant[i], '\nValues: ', global_values[i], '\nIrrelevant Features: ',
                  irrelevant[i], '\nElapsed time: ',elapsed_time[i],'seconds\n\n')
    
    print("Tamanho médio de explicação: ",sum(len(x) for x in relevant)/len(relevant)," - Custo médio: ",round(sum(elapsed_time)/len(elapsed_time),5),"seg(s)")        
    return relevant, irrelevant, elapsed_time

### For Linear Classifier

#### Get thresholds and the rejected for Linear

In [27]:
T1,T2, positive_indexes,negative_indexes,rejected_indexes,lim_pos,lim_neg = find_thresholds_and_indexes(clf,X_train)
T1,T2, positive_indexes.shape[0],negative_indexes.shape[0],rejected_indexes.shape[0]

Superior Limit:  1.8675370149765804 
Inferior Limit:  -6.600371124175779
Thresholds by min( EWRR ) from solution:  {'WR': 0.04, 'T1': 1.3072759104836065, 'T2': -4.620259786923046, 'E': 0.0, 'R': 329, 'EWRR': 0.03306532663316583}


(1.3072759104836065, -4.620259786923046, 67, 2, 329)

#### Get Z3's equivalent to linear classifier's decision function

In [29]:
z3_dual_coef,z3_support_vectors,z3_intercept_,z3_X_Train = to_z3_conversion(clf,X_train)

In [30]:
rejected_linear_relevant = []
rejected_linear_irrelevant = []
positive_linear_relevant = []
positive_linear_irrelevant = []
negative_linear_relevant = []
negative_linear_irrelevant = []
rejected_elapsed_time = []
positive_elapsed_time = []
negative_elapsed_time = []

In [63]:
if len(rejected_indexes)!=0:
    if len(rejected_indexes)>=50:
        rejected_linear_relevant, rejected_linear_irrelevant, rejected_elapsed_time = z3_explanation(clf,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,rejected_indexes[0:50],
                                                                          sup_lim=lim_pos,inf_lim=lim_neg,show_values=True, rejected = True)
    else:
        rejected_linear_relevant, rejected_linear_irrelevant, rejected_elapsed_time = z3_explanation(clf,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,rejected_indexes,
                                                                          sup_lim=lim_pos,inf_lim=lim_neg,show_values=True, rejected = True)

Number of Instances:  50
Classifier: Linear
Declared: Rejected Instances
i =  0 0 unsat
i =  0 1 unsat
i =  0 2 unsat
i =  0 3 unsat
i =  0 4 unsat
i =  0 5 unsat
i =  0 6 unsat
i =  0 7 unsat
i =  0 8 unsat
i =  0 9 unsat
i =  0 10 unsat
i =  0 11 unsat
i =  0 12 unsat
i =  0 13 unsat
i =  0 14 unsat
i =  0 15 unsat
i =  0 16 unsat
i =  0 17 unsat
i =  0 18 unsat
i =  0 19 unsat
i =  0 20 unsat
i =  0 21 unsat
i =  0 22 unsat
i =  0 23 sat 0.19426422190879172 1.0
i =  0 24 unsat
i =  0 25 unsat
i =  0 26 unsat
i =  0 27 unsat
i =  0 28 unsat
i =  0 29 unsat
Finished  0
i =  1 0 unsat
i =  1 1 unsat
i =  1 2 unsat
i =  1 3 unsat
i =  1 4 unsat
i =  1 5 unsat
i =  1 6 unsat
i =  1 7 unsat
i =  1 8 unsat
i =  1 9 unsat
i =  1 10 unsat
i =  1 11 unsat
i =  1 12 unsat
i =  1 13 unsat
i =  1 14 unsat
i =  1 15 unsat
i =  1 16 unsat
i =  1 17 unsat
i =  1 18 unsat
i =  1 19 unsat
i =  1 20 unsat
i =  1 21 unsat
i =  1 22 unsat
i =  1 23 sat 0.1614010343206394 1.0
i =  1 24 unsat
i =  1 25 un

In [64]:
if len(positive_indexes)!=0:
    if len(positive_indexes)>=50:
        positive_linear_relevant, positive_linear_irrelevant, positive_elapsed_time = z3_explanation(clf,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,positive_indexes[0:50],
                                                                          sup_lim=lim_pos,inf_lim=lim_neg,show_values=True, positive = True)
    else:
        positive_linear_relevant, positive_linear_irrelevant, positive_elapsed_time = z3_explanation(clf,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,positive_indexes,
                                                                          sup_lim=lim_pos,inf_lim=lim_neg,show_values=True, positive = True)

Number of Instances:  50
Classifier: Linear
Declared: Positive Instances
i =  0 0 unsat
i =  0 1 unsat
i =  0 2 sat 0.015660554771979314 0.6007239822237846
i =  0 3 sat 0.07748001880582982 0.39029940494165083
i =  0 4 unsat
i =  0 5 unsat
i =  0 6 unsat
i =  0 7 unsat
i =  0 8 unsat
i =  0 9 unsat
i =  0 10 unsat
i =  0 11 unsat
i =  0 12 unsat
i =  0 13 sat 0.002787964268923366 0.5092541999506848
i =  0 14 unsat
i =  0 15 unsat
i =  0 16 unsat
i =  0 17 unsat
i =  0 18 unsat
i =  0 19 unsat
i =  0 20 sat 0.002708039492242595 0.5534605411938999
i =  0 21 sat 0.00465444287729196 0.5283837885297799
i =  0 22 sat 0.01727080394922426 0.5157767431074368
i =  0 23 sat 0.0929478138222849 0.23877248210570007
i =  0 24 unsat
i =  0 25 unsat
i =  0 26 sat 6.203573107663376e-06 0.6916192484160175
i =  0 27 unsat
i =  0 28 sat 6.147155618241655e-05 0.643766397266513
i =  0 29 sat 1.94381758345087e-05 0.9810211388366884
Finished  0
i =  1 0 unsat
i =  1 1 sat 0.004454630935590033 0.8964452345820534

In [65]:
if len(negative_indexes)!=0:
    if len(negative_indexes)>=50:
        negative_linear_relevant, negative_linear_irrelevant, negative_elapsed_time = z3_explanation(clf,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,negative_indexes[0:50],
                                                                          sup_lim=lim_pos,inf_lim=lim_neg,show_values=True, negative = True)
    else:
        negative_linear_relevant, negative_linear_irrelevant, negative_elapsed_time = z3_explanation(clf,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,negative_indexes,
                                                                          sup_lim=lim_pos,inf_lim=lim_neg,show_values=True, negative = True)

Number of Instances:  2
Classifier: Linear
Declared: Negative Instances
i =  0 0 unsat
i =  0 1 unsat
i =  0 2 unsat
i =  0 3 sat 0.5289139633286318 0.5183172704399466
i =  0 4 unsat
i =  0 5 unsat
i =  0 6 unsat
i =  0 7 unsat
i =  0 8 unsat
i =  0 9 unsat
i =  0 10 unsat
i =  0 11 unsat
i =  0 12 unsat
i =  0 13 unsat
i =  0 14 unsat
i =  0 15 unsat
i =  0 16 unsat
i =  0 17 unsat
i =  0 18 unsat
i =  0 19 unsat
i =  0 20 unsat
i =  0 21 unsat
i =  0 22 sat 0.05190409026798308 0.01025209607344135
i =  0 23 sat 0.7559943582510579 0.7531765981024693
i =  0 24 unsat
i =  0 25 unsat
i =  0 26 unsat
i =  0 27 unsat
i =  0 28 unsat
i =  0 29 unsat
Finished  0
i =  1 0 unsat
i =  1 1 unsat
i =  1 2 unsat
i =  1 3 sat 0.5879172543488481 0.0009103622503449467
i =  1 4 unsat
i =  1 5 unsat
i =  1 6 unsat
i =  1 7 unsat
i =  1 8 unsat
i =  1 9 unsat
i =  1 10 unsat
i =  1 11 unsat
i =  1 12 unsat
i =  1 13 unsat
i =  1 14 unsat
i =  1 15 unsat
i =  1 16 unsat
i =  1 17 unsat
i =  1 18 unsat
i =

### For Polynomial Classifier (WIP)

#### Get thresholds and the rejected for Poly

In [34]:
#T1,T2, positive_indexes,negative_indexes,rejected_indexes = find_thresholds_and_indexes(poly,X_train)
#T1,T2, positive_indexes.shape[0],negative_indexes.shape[0],rejected_indexes.shape[0]

#### Get Z3's equivalent to Poly classifier's decision function

In [35]:
#z3_dual_coef,z3_support_vectors,z3_intercept_,z3_X_Train,z3_gamma,z3_coef0,z3_degree = to_z3_conversion(poly,X_train)

In [36]:
#poly_relevant, poly_irrelevant = z3_explanation(poly,T1,T2,X_train,z3_dual_coef,z3_support_vectors,z3_X_Train,z3_intercept_,rejected_indexes[0:1], z3_gamma,z3_coef0,z3_degree, show_values=False)

# Anchors

## Setting Up

In [37]:
from __future__ import print_function
import sys
import sklearn
import sklearn.ensemble
from anchor import utils
from anchor import anchor_tabular

In [38]:
def generate_ro_target_set(target_set,rejected_indexes):
    target_set[rejected_indexes] = 0
    return target_set

In [39]:
ro_set = generate_ro_target_set(y_train,rejected_indexes)
print(np.unique(ro_set))

[-1  0  1]


In [40]:
feature_list = []
for i in range(0,len(X_train[0])):
    feature_list.append(str(i))
feature_list = np.array(feature_list)

In [41]:
explainer = anchor_tabular.AnchorTabularExplainer(
    [-1,0,1],
    feature_list,
    X_train)

In [61]:
def svm_decfun(data,classifier=clf):
    data = np.atleast_2d(data)
    return ((classifier.dual_coef_ @ classifier.support_vectors_) @ data.T + classifier.intercept_)[0][0]

In [62]:
def svm_decfun_class(data,classifier=clf,Threshold_1=T1,Threshold_2=T2):
    if svm_decfun(data) >= Threshold_1:
        return np.array([2]) #class 1, since [-1, 0, 1]
    elif svm_decfun(data) < Threshold_2:
        return np.array([0]) #class -1
    else:
        return np.array([1]) #class 0

### Anchors Explanation & Z3 Validation

In [51]:
def anchors_to_z3_explanation(explainer, X, t1, t2, z3_coef, z3_sup_vec, z3_X, z3_intercept, indexes,
                              z3_gamma=None, z3_coef0=None, z3_degree=None,min=0,max=1,positive=False,negative=False,rejected=False):
    print('Started')
    sat_var = 0
    unsat_var = 0
    np.random.seed(1)
    solver = Solver()
    feature_sizes = []
    anchors_time = []
    if rejected:
        print("Declared: Rejected Instances")
        solver.add(Or(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] >= t1,
                      ((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] < t2))
    elif positive:
        print("Declared: Positive Instances")
        solver.add(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] < t1)
    elif negative:
        print("Declared: Negative Instances")
        solver.add(((z3_coef @ z3_sup_vec) @ z3_X.reshape(1, len(z3_X)).T + z3_intercept)[0][0] >= t2)
    else:
        print("WARNING: Must declare if are positive,negative or rejected instances!")
        return None
    for j in range(0, len(z3_X)):
        solver.add(z3_X[j] >= min)
        solver.add(z3_X[j] <= max)
    solver.push()
    for idx in indexes:
        print('Prediction: ', explainer.class_names[svm_decfun_class(X[idx])[0]])
        start = time.perf_counter()  
        exp = explainer.explain_instance(X[idx], svm_decfun_class, threshold=1)
        anchors_time.append(time.perf_counter() - start)
        #print(exp.names())
        print('Index = ', idx)
        feature_sizes.append(len(np.unique((exp.features()))))
        for feature in np.unique(exp.features()):       
            solver.add(z3_X[feature] == X[idx][feature])
            #print("Added restrictions: ",z3_X[feature],X[idx][feature])
        
        print(solver.check(), ' for ', idx,'\n')
        if solver.check() == sat:
            model = solver.model()
            print(model)
            sat_var +=1
        else:
            unsat_var +=1
        
        print('\n---------------\n')
        solver.pop()
        solver.push()
    print("Sat = ",sat_var,"\nUnsat = ",unsat_var)
    return sat_var,unsat_var,feature_sizes,anchors_time

In [52]:
rejected_sat_var = 0
rejected_unsat_var = 0
positive_sat_var = 0
positive_unsat_var = 0
negative_sat_var = 0
negative_unsat_var = 0
rejected_feature_sizes = []
positive_feature_sizes = []
negative_feature_sizes = []
r_time = []
p_time = []
n_time  = []

In [53]:
if len(rejected_indexes)!=0:
    if len(rejected_indexes)>=50:
        rejected_sat_var, rejected_unsat_var, rejected_feature_sizes,r_time = anchors_to_z3_explanation(explainer, X_train, T1, T2, z3_dual_coef, z3_support_vectors, z3_X_Train, z3_intercept_,rejected_indexes[0:50],rejected=True)
    else:
        rejected_sat_var, rejected_unsat_var, rejected_feature_sizes,r_time = anchors_to_z3_explanation(explainer, X_train, T1, T2, z3_dual_coef, z3_support_vectors, z3_X_Train, z3_intercept_,rejected_indexes,rejected=True)

Started
Declared: Rejected Instances
Prediction:  0
Index =  0
sat  for  0 

[x__16 = 0,
 x__4 = 0,
 x__20 = 0,
 x__21 = 0,
 x__6 = 0,
 x__15 = 0,
 x__23 = 13363478146319633511947593443399179934657566100411432015931/13806582110909334671307952818344802200000000000000000000000,
 x__2 = 0,
 x__19 = 0,
 x__13 = 0,
 x__14 = 0,
 x__10 = 0,
 x__1 = 0,
 x__9 = 0,
 x__24 = 1,
 x__11 = 0,
 x__8 = 0,
 x__5 = 0,
 x__22 = 0,
 x__0 = 0,
 x__26 = 1,
 x__27 = 1,
 x__25 = 1617301363422661/50000000000000000000,
 x__3 = 3143394452280207/20000000000000000,
 x__12 = 0,
 x__28 = 1,
 x__7 = 0,
 x__18 = 0,
 x__29 = 1,
 x__17 = 0]

---------------

Prediction:  0
Index =  1
sat  for  1 

[x__16 = 0,
 x__4 = 0,
 x__20 = 0,
 x__21 = 0,
 x__6 = 0,
 x__15 = 0,
 x__23 = 1026923258943674431174334125669905251161949792419130254687/1062044777762256513177534832180369400000000000000000000000,
 x__2 = 0,
 x__19 = 0,
 x__13 = 9887165021156559/2000000000000000000,
 x__14 = 0,
 x__10 = 0,
 x__1 = 0,
 x__9 = 0,
 x__24 = 1,
 x

In [54]:
if len(positive_indexes)!=0:
    if len(positive_indexes)>=50:
        positive_sat_var, positive_unsat_var, positive_feature_sizes,p_time = anchors_to_z3_explanation(explainer, X_train, T1, T2, z3_dual_coef, z3_support_vectors, z3_X_Train, z3_intercept_,positive_indexes[0:50],positive=True)
    else:
        positive_sat_var, positive_unsat_var, positive_feature_sizes,p_time = anchors_to_z3_explanation(explainer, X_train, T1, T2, z3_dual_coef, z3_support_vectors, z3_X_Train, z3_intercept_,positive_indexes,positive=True)

Started
Declared: Positive Instances
Prediction:  1
Index =  4
unsat  for  4 


---------------

Prediction:  1
Index =  9
unsat  for  9 


---------------

Prediction:  1
Index =  14
unsat  for  14 


---------------

Prediction:  1
Index =  17
unsat  for  17 


---------------

Prediction:  1
Index =  20
unsat  for  20 


---------------

Prediction:  1
Index =  23
unsat  for  23 


---------------

Prediction:  1
Index =  28
unsat  for  28 


---------------

Prediction:  1
Index =  39
unsat  for  39 


---------------

Prediction:  1
Index =  43
unsat  for  43 


---------------

Prediction:  1
Index =  45
unsat  for  45 


---------------

Prediction:  1
Index =  49
unsat  for  49 


---------------

Prediction:  1
Index =  54
unsat  for  54 


---------------

Prediction:  1
Index =  59
unsat  for  59 


---------------

Prediction:  1
Index =  81
unsat  for  81 


---------------

Prediction:  1
Index =  89
unsat  for  89 


---------------

Prediction:  1
Index =  94
unsat  for

In [55]:
if len(negative_indexes)!=0:
    if len(negative_indexes)>=50:
        negative_sat_var, negative_unsat_var, negative_feature_sizes,n_time = anchors_to_z3_explanation(explainer, X_train, T1, T2, z3_dual_coef, z3_support_vectors, z3_X_Train, z3_intercept_,negative_indexes[0:50],negative=True)
    else:
        negative_sat_var, negative_unsat_var, negative_feature_sizes,n_time = anchors_to_z3_explanation(explainer, X_train, T1, T2, z3_dual_coef, z3_support_vectors, z3_X_Train, z3_intercept_,negative_indexes,negative=True)

Started
Declared: Negative Instances
Prediction:  -1
Index =  158
unsat  for  158 


---------------

Prediction:  -1
Index =  190
sat  for  190 

[x__16 = 946755994358251/50000000000000000000,
 x__4 = 0,
 x__20 = 0,
 x__21 = 7374236013164081/1000000000000000000,
 x__6 = 0,
 x__15 = 0,
 x__23 = 1,
 x__2 = 1372972496473907/31250000000000000,
 x__19 = 1071462153267513/1000000000000000000000,
 x__13 = 3186412787964269/25000000000000000,
 x__14 = 8991537376586741/5000000000000000000000,
 x__10 = 0,
 x__1 = 1235072872590503/200000000000000000,
 x__9 = 13218147625763987/1000000000000000000000,
 x__24 = 498427950164551/15625000000000000000,
 x__11 = 307005171603197/1000000000000000000,
 x__8 = 21156558533145273/500000000000000000000,
 x__5 = 4673248707099201/100000000000000000000,
 x__22 = 0,
 x__0 = 6398683591913493/1000000000000000000,
 x__26 = 0,
 x__27 = 0,
 x__25 = 0,
 x__3 = 1583175473272360610478501982372542994027862404001918613853/279373871872690817890819594234269700000000000000000000

## Calculating Metrics

### Z3 AND ANCHORS TIME COST

In [56]:
soma = 0
tamanho = 0
if rejected_elapsed_time != None:
    soma += sum(rejected_elapsed_time)
    tamanho += len(rejected_elapsed_time)
    print("Tempo Médio Z3 Rejeitados = ",sum(rejected_elapsed_time)/len(rejected_elapsed_time))
if positive_elapsed_time != None:
    soma += sum(positive_elapsed_time)
    tamanho += len(positive_elapsed_time)
    print("Tempo Médio Z3 Positivos = ",sum(positive_elapsed_time)/len(positive_elapsed_time))
if negative_elapsed_time != None:
    soma += sum(negative_elapsed_time)
    tamanho += len(negative_elapsed_time)
    print("Tempo Médio Z3 Negativos = ",sum(negative_elapsed_time)/len(negative_elapsed_time)) 
if tamanho != 0:
    print("Tamanho Médio Total: ",soma/tamanho,'sec')
    print("Soma: ",soma," Tamanho: ",tamanho)

Tempo Médio Z3 Rejeitados =  0.4511827459999992
Tempo Médio Z3 Positivos =  0.16058558800000072
Tempo Médio Z3 Negativos =  0.11616199999998855
Tamanho Médio Total:  0.3021641245098037 sec
Soma:  30.820740699999973  Tamanho:  102


In [57]:
soma = 0
tamanho = 0
if r_time != None:
    soma += sum(r_time)
    tamanho += len(r_time)
    print("Tempo Médio Anchors Rejeitados = ",sum(r_time)/len(r_time))
if p_time != None:
    soma += sum(p_time)
    tamanho += len(p_time)
    print("Tempo Médio Anchors Positivos = ",sum(p_time)/len(p_time))
if n_time != None:
    soma += sum(n_time)
    tamanho += len(n_time)
    print("Tempo Médio Anchors Negativos = ",sum(n_time)/len(n_time))      
if tamanho != 0:
    print("Tamanho Médio Total: ",soma/tamanho,'sec')
    print("Soma: ",soma," Tamanho: ",tamanho)

Tempo Médio Anchors Rejeitados =  3.0406364019999974
Tempo Médio Anchors Positivos =  73.721545576
Tempo Médio Anchors Negativos =  27.28975739999987
Tamanho Médio Total:  38.16361385980392 sec
Soma:  3892.6886136999997  Tamanho:  102


### Z3 AND ANCHORS EXPLANATION SIZE

In [58]:
soma = 0
tamanho = 0
if len(rejected_linear_relevant)>0:  
    tamanho += len(rejected_linear_relevant)
    tamanho_rejeitado = 0
    for x in rejected_linear_relevant:
        soma += len(x)
        tamanho_rejeitado += len(x)
    print("Z3 Tamanho Rejeitado: ",tamanho_rejeitado,"Tamanho Médio Rejeitado:",tamanho_rejeitado/len(rejected_linear_relevant))
if len(positive_linear_relevant)>0:   
    tamanho += len(positive_linear_relevant)
    tamanho_positivo = 0
    for x in positive_linear_relevant:
        soma += len(x)
        tamanho_positivo += len(x)
    print("Z3 Tamanho Positivo: ",tamanho_positivo,"Tamanho Médio Positivo:",tamanho_positivo/len(positive_linear_relevant))
if len(negative_linear_relevant)>0:
    tamanho += len(negative_linear_relevant)
    tamanho_negativo = 0
    for x in negative_linear_relevant:
        soma += len(x)
        tamanho_negativo += len(x)
    print("Z3 Tamanho Negativo: ",tamanho_negativo,"Tamanho Médio Negativo:",tamanho_negativo/len(negative_linear_relevant))
print("Z3 Tamanho Médio ",soma/tamanho)
print("Soma: ",soma,"Tamanho: ",tamanho)

Z3 Tamanho Rejeitado:  101 Tamanho Médio Rejeitado: 2.02
Z3 Tamanho Positivo:  516 Tamanho Médio Positivo: 10.32
Z3 Tamanho Negativo:  5 Tamanho Médio Negativo: 2.5
Z3 Tamanho Médio  6.098039215686274
Soma:  622 Tamanho:  102


In [59]:
if len(rejected_feature_sizes) > 0 :
    print("Anchors Tamanho Rejeitado: ",sum(rejected_feature_sizes),"Tamanho Médio Rejeitado",sum(rejected_feature_sizes)/len(rejected_feature_sizes))
if len(positive_feature_sizes) > 0 :
    print("Anchors Tamanho Positivo: ",sum(positive_feature_sizes),"Tamanho Médio Positivo",sum(positive_feature_sizes)/len(positive_feature_sizes))
if len(negative_feature_sizes) > 0 :
    print("Anchors Tamanho Negativo: ",sum(negative_feature_sizes),"Tamanho Médio Negativo",sum(negative_feature_sizes)/len(negative_feature_sizes))
soma = sum(rejected_feature_sizes)+sum(positive_feature_sizes)+sum(negative_feature_sizes)
soma_len = len(rejected_feature_sizes) + len(positive_feature_sizes) + len(negative_feature_sizes)
print("Anchors Tamanho Médio: ",soma/soma_len)
print("Soma: ",soma,"Tamanho: ",soma_len)

Anchors Tamanho Rejeitado:  104 Tamanho Médio Rejeitado 2.08
Anchors Tamanho Positivo:  1493 Tamanho Médio Positivo 29.86
Anchors Tamanho Negativo:  44 Tamanho Médio Negativo 22.0
Anchors Tamanho Médio:  16.08823529411765
Soma:  1641 Tamanho:  102


### ANCHORS TO Z3 SAT AND UNSAT

In [60]:
print("          SAT/UNSAT")
print("Rejected: ",rejected_sat_var,"|",rejected_unsat_var)
print("Positive: ",positive_sat_var,"|",positive_unsat_var)
print("Negative: ",negative_sat_var,"|",negative_unsat_var)
soma_sat = rejected_sat_var + positive_sat_var + negative_sat_var
soma_unsat = rejected_unsat_var + positive_unsat_var + negative_unsat_var
print("Total:    ",soma_sat,"|",soma_unsat," of ",soma_sat+soma_unsat)

          SAT/UNSAT
Rejected:  50 | 0
Positive:  1 | 49
Negative:  1 | 1
Total:     52 | 50  of  102
