In [None]:
import numpy as np
import pandas as pd
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics

#### Model Training

In [None]:
def check_targets_0_1(original_set):

    """
    ## Check if original binary targets are following the [-1, 1] pattern.
    """

    original_unique = np.unique(original_set)
    print("Original Targets: ",original_unique,"\nDesired Targets: [0,1]")
    print("Is original the desired [0, 1]? ", np.array_equiv(original_unique,np.array([0,1])))
    if np.array_equiv(original_unique,np.array([0,1])):
        return original_set
    else:
        if 1 in original_unique:
            print("1 exists in dataset")
            new = np.select([original_set == original_unique[0]],[-1],original_set)
        elif 0 in original_unique:
            print("0 exists in dataset")
            new = np.select([original_set == original_unique[1]],[1],original_set)
        else:
            print("Neither exists in dataset")
            new = np.select([original_set == original_unique[0],original_set == original_unique[1]],[0,1],original_set)
        print("New dataset targets consists of: ",np.unique(new))
        return new

In [None]:
# Load Dataset
dataset = datasets.load_iris()
dataset_name = 'Iris'
df = pd.DataFrame(dataset.data, columns = dataset.feature_names)

In [None]:
# Scale
scaler = MinMaxScaler()
scaler.fit(dataset.data)
scaled_df = scaler.transform(dataset.data)

In [None]:
# Get scaled bounds
lower_bound = scaled_df.min()
upper_bound = scaled_df.max()
print(lower_bound, upper_bound)

0.0 1.0


In [None]:
# Check if binary targets
df = pd.DataFrame(scaled_df, columns=df.columns)
targets = (check_targets_0_1(np.where(dataset.target == dataset.target[0],0,1))).astype(np.int32)
df['target'] = targets

Original Targets:  [0 1] 
Desired Targets: [0,1]
Is original the desired [0, 1]?  True


In [None]:
# Train model
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1].values, targets, test_size=0.3,random_state=50,stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

clf = svm.SVC(kernel='linear')

# Train the model using the training set
clf.fit(X_train, y_train)

# Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy Linear:", metrics.accuracy_score(y_test, y_pred))

Accuracy Linear: 1.0


#### PI-Explanation

In [None]:
def get_svm_classifier_weights(classifier):
    weights = np.dot(clf.dual_coef_[0], clf.support_vectors_)
    bias = clf.intercept_
    return weights, bias

def get_svm_classifier_boolean_pred(classifier,instance):
    return False if clf.predict(np.atleast_2d(instance))[0] == 0 else True

def get_delta_values(df,weights,instance,positive=True):
    instance = np.asarray(instance)
    # if instance.ndim > 1:
    #     instance = instance.flatten()
    # Cria uma lista para armazenar os valores de delta
    delta = []

    #Verifica os menores e maiores valores do domínio de cada feature
    min_feature_values = df.iloc[:, :-1].min(axis=0).values
    max_feature_values = df.iloc[:, :-1].max(axis=0).values

    #Se instância classificada como positiva
    if positive:
        #Para cada feature, salve o delta que mais afasta da classe positiva
        for i,feature in enumerate(df.columns[:-1]):
            min_value = weights[i] * instance[i] - weights[i] * min_feature_values[i]
            max_value = weights[i] * instance[i] - weights[i] * max_feature_values[i]
            delta.append(np.max([min_value,max_value]))
        return np.asarray(delta)

    #Para cada feature, salve o delta que mais afasta da classe negativa
    for i,feature in enumerate(df.columns[:-1]):
        min_value = weights[i] * instance[i] - weights[i] * min_feature_values[i]
        max_value = weights[i] * instance[i] - weights[i] * max_feature_values[i]
        delta.append(np.min([min_value,max_value]))
    return np.asarray(delta)

def get_gamma_a(weights,bias,instance):
    return bias + np.dot(weights,instance)

def find_threshold(df,weights,bias, instance,positive=True):
    if instance.ndim > 1:
        instance = instance.flatten()
    instance = np.asarray(instance)
    delta = get_delta_values(df,weights,instance,positive=positive)
    gamma_a = get_gamma_a(weights,bias,instance)
    threshold = np.sum(delta) - gamma_a
    return threshold[0]

#Weights and bias
w, b = get_svm_classifier_weights(clf)

#Instances and labels
instances, labels = (df.iloc[:, :-1].values.tolist(),df.iloc[:, -1].values.tolist())  # Exclude the target column

#Random Instance
instance = np.asarray(instances[0])
label_pred = get_svm_classifier_boolean_pred(clf, instance)
print(label_pred)
delta = get_delta_values(df,w,instance,positive=label_pred)
print(delta, np.sum(delta),'\n')
gamma_a = get_gamma_a(w,b,instance)
print(gamma_a)
threshold = np.sum(delta) - gamma_a
print(threshold)
# print(np.sum(delta),gamma_a,threshold)
find_threshold(df,w,b, instance,positive=label_pred)

False
[-0.57260671 -0.86158228 -1.99515768 -1.79552634] -5.224873020977764 

[-1.22756103]
[-3.99731199]


-3.997311991743618

In [None]:
#Max possible positive value (best case for positive class)
w, np.where(np.sign(w) ==-1,0,1), np.dot(w,np.where(np.sign(w) ==-1,0,1)) + b

(array([ 0.73620863, -1.37853165,  2.14026006,  1.8735927 ]),
 array([1, 0, 1, 1]),
 array([3.99731199]))

In [None]:
#Min possible negative value (best case for negative class)
w,np.where(np.sign(w)*(-1) ==-1,0,1),np.dot(w,np.where(np.sign(w)*(-1) ==-1,0,1)) + b

(array([ 0.73620863, -1.37853165,  2.14026006,  1.8735927 ]),
 array([0, 1, 0, 0]),
 array([-2.13128106]))

In [None]:
find_threshold(df,w,b, np.atleast_2d(X_test[np.where(y_test == 1)[0]][0]),positive=True)

2.13128105719199

In [None]:
find_threshold(df,w,b, np.atleast_2d(X_test[np.where(y_test == 0)[0]][0]),positive=False)

-3.997311991743618

In [None]:
def oneExplanation(Vs, Flip, Delta, indexes, threshold, Idx, Xpl,positive=True):
    '''
    This function generates an explanation for a decision-making process by analyzing
    the values of an instance, decision steps, and sorted deltas. It considers a
    threshold to determine the literals contributing to the explanation.

    Parameters:
    - Vs: Values of the instance being explained.
    - Flip: Array reference of decision steps.
    - Delta (∆): Array of sorted delta(δj) values.
    - threshold (ΦR): Explanation threshold.
    - Idx: Index corresponding to Delta (∆).
    - Xpl: Set reference of explanation literals.

    Returns:
    - Explanation Xpl, Updated threshold; Updated index for Delta (∆)
    '''
    if positive:
        print(f'\n\nPositive Class\n')
        while threshold > 0:
            Flip[Idx] = 0
            print(f'\nIdx {Idx}, Flip {Flip}, threshold = {threshold}, Delta[Idx] {Delta[Idx]}')

            threshold = threshold - Delta[Idx]
            print(f'updated threshold {threshold}')
            Xpl.add((indexes[Idx], Vs[Idx]))
            Idx = Idx + 1

        print(Xpl)
        return Xpl, threshold, Idx

    print(f'\n\nNegative Class\n')
    while threshold <= 0:
        Flip[Idx] = 0
        print(f'\nIdx {Idx}, Flip {Flip}, threshold = {threshold}, Delta[Idx] {Delta[Idx]}')

        threshold = threshold - Delta[Idx]
        print(f'updated threshold {threshold}')
        Xpl.add((indexes[Idx], Vs[Idx]))
        Idx = Idx + 1

    print(f'\nExplanation: {Xpl}')
    return Xpl, threshold, Idx

#Get variables from data
w, b = get_svm_classifier_weights(clf)
print(f'weights {w} bias {b}')
instances, labels = (df.iloc[:, :-1].values.tolist(),df.iloc[:, -1].values.tolist())  # Exclude the target column
instance = np.asarray(X_test[np.where(y_test == 0)][0]) #Vs
print(f'instance {instance}')

label_pred = get_svm_classifier_boolean_pred(clf, instance)
print(f'label_pred {label_pred}')

delta = get_delta_values(df,w,instance,positive=label_pred)
print(f'delta {delta}')

threshold = find_threshold(df,w,b, instance,positive=label_pred)
print(f'threshold {threshold}')

#Prepare to Generate one explanation
Flip = [-1 for element in instance]


# Extract sorted values and new indexes
sorted_indices = np.argsort(-np.abs(delta))  # Negative sign for descending order
sorted_delta = delta[sorted_indices]
print(f'sorted_delta {sorted_delta}')
print(f'sorted_indices {sorted_indices}')
Idx = 0
Xpl = set()
#Flip, sorted_delta, threshold, Idx,Xpl
oneExplanation(instance, Flip, sorted_delta, sorted_indices, threshold, Idx, Xpl,positive=label_pred)

weights [ 0.73620863 -1.37853165  2.14026006  1.8735927 ] bias [-0.7527494]
instance [0.30555556 0.79166667 0.11864407 0.125     ]
label_pred False
delta [-0.51125599 -1.09133756 -1.8863309  -1.63939361]
threshold -3.997311991743618
sorted_delta [-1.8863309  -1.63939361 -1.09133756 -0.51125599]
sorted_indices [2 3 1 0]


Negative Class


Idx 0, Flip [0, -1, -1, -1], threshold = -3.997311991743618, Delta[Idx] -1.8863309012360157
updated threshold -2.1109810905076025

Idx 1, Flip [0, 0, -1, -1], threshold = -2.1109810905076025, Delta[Idx] -1.639393614418113
updated threshold -0.4715874760894896

Idx 2, Flip [0, 0, 0, -1], threshold = -0.4715874760894896, Delta[Idx] -1.0913375590906547
updated threshold 0.6197500830011651

Explanation: {(1, 0.11864406779661013), (2, 0.3055555555555556), (3, 0.7916666666666665)}


({(1, 0.11864406779661013), (2, 0.3055555555555556), (3, 0.7916666666666665)},
 0.6197500830011651,
 3)

In [None]:
#Get variables from data
w, b = get_svm_classifier_weights(clf)
instances, labels = (df.iloc[:, :-1].values.tolist(),df.iloc[:, -1].values.tolist())  # Exclude the target column
instance = np.asarray(X_test[np.where(y_test == 1)][0]) #Vs, Negative class
label_pred = get_svm_classifier_boolean_pred(clf, instance)
threshold = find_threshold(df,w,b, instance,positive=label_pred)

#Prepare to Generate one explanation
Flip = [-1 for element in instance]
delta = get_delta_values(df,w,instance,positive=label_pred)
sorted_indices = np.argsort(-np.abs(delta))  # Negative sign for descending order
sorted_delta = delta[sorted_indices]
Idx = 0
Xpl = set()
#Flip, sorted_delta, threshold, Idx,Xpl
updated_Xpl,updated_threshold, updated_Idx = oneExplanation(instance, Flip, sorted_delta, sorted_indices, threshold, Idx, Xpl,positive=label_pred)



Positive Class


Idx 0, Flip [0, -1, -1, -1], threshold = 2.13128105719199, Delta[Idx] 1.487299364436089
updated threshold 0.6439816927559008

Idx 1, Flip [0, 0, -1, -1], threshold = 0.6439816927559008, Delta[Idx] 1.4051945266440968
updated threshold -0.761212833888196
{(2, 0.4166666666666665), (3, 0.29166666666666674)}


#### All Explanations for Positive class

In [None]:
def verify_explanations(instances,explanations,sorted_indices,weights,clf,positive=True):
    size = len(instances[0])
    explanations = [{key: value for key, value in explanation} for explanation in explanations]
    for i,instance in enumerate(instances):
        explanation = explanations[i]
        pred = clf.predict(np.atleast_2d(instance))
        score = clf.decision_function(np.atleast_2d(instance))
        print(f'{i} instance {instance}, explanation {explanation}, pred {pred}, {score}')
        for j in range(size):
            #print(f'{j} in {sorted_indices} {j in sorted_indices} {explanation} {j in explanation.keys()}')
            if j in explanation.keys():
                #instance[j] = explanation[j]
                pass
            else:
                if positive:
                    instance[j] = np.where(np.sign(w)*(-1) ==-1,0,1)[j] #Adiciona valores em direção da classe negativa
                else:
                    instance[j] = np.where(np.sign(w) ==-1,0,1)[j] #Adiciona valores em direção da classe positiva
        pred = clf.predict(np.atleast_2d(instance))
        score = clf.decision_function(np.atleast_2d(instance))
        print(f'instance {instance}, pred = {clf.predict(np.atleast_2d(instance))}, {score}\n')
        if positive:
            if pred == 0:
                print(f'-------------ALERT-----------')
        else:
            if pred == 1:
                print(f'-------------ALERT-----------')

In [None]:
#Weights and bias
w, b = get_svm_classifier_weights(clf)

#Positive instances
instances, labels = (df.iloc[:, :-1].values.tolist(),df.iloc[:, -1].values.tolist())  # Exclude the target column
instances = np.asarray(X_test[np.where(y_test == 1)])

positive_Xpls = []
for instance in instances:
    #Prepare to Generate one explanation
    label_pred = get_svm_classifier_boolean_pred(clf,instance)
    delta = get_delta_values(df,w,instance,positive=label_pred)
    sorted_indices = np.argsort(-np.abs(delta))  # Negative sign for descending order
    sorted_delta = delta[sorted_indices]
    threshold = find_threshold(df,w,b, instance,positive=label_pred)
    Flip = [-1 for element in instance]
    sorted_delta = np.array(sorted(delta, key=abs, reverse=True))
    Idx = 0
    Xpl = set()

    updated_Xpl,updated_threshold, updated_Idx = oneExplanation(instance, Flip, sorted_delta, sorted_indices,threshold, Idx, Xpl,positive=label_pred)
    positive_Xpls.append(updated_Xpl)



Positive Class


Idx 0, Flip [0, -1, -1, -1], threshold = 2.13128105719199, Delta[Idx] 1.487299364436089
updated threshold 0.6439816927559008

Idx 1, Flip [0, 0, -1, -1], threshold = 0.6439816927559008, Delta[Idx] 1.4051945266440968
updated threshold -0.761212833888196
{(2, 0.4166666666666665), (3, 0.29166666666666674)}


Positive Class


Idx 0, Flip [0, -1, -1, -1], threshold = 2.1312810571919893, Delta[Idx] 1.197094610399779
updated threshold 0.9341864467922103

Idx 1, Flip [0, 0, -1, -1], threshold = 0.9341864467922103, Delta[Idx] 0.9367963510960647
updated threshold -0.002609904303854438
{(2, 0.5833333333333335), (3, 0.375)}


Positive Class


Idx 0, Flip [0, -1, -1, -1], threshold = 2.13128105719199, Delta[Idx] 1.2696457989088565
updated threshold 0.8616352582831333

Idx 1, Flip [0, 0, -1, -1], threshold = 0.8616352582831333, Delta[Idx] 1.092929076278742
updated threshold -0.23129381799560877
{(2, 0.4722222222222223), (3, 0.375)}


Positive Class


Idx 0, Flip [0, -1, -1, -1], t

In [None]:
verify_explanations(instances=instances.copy(),
                    explanations=positive_Xpls.copy(),
                    weights=w.copy(),
                    clf=clf,
                    sorted_indices = sorted_indices,
                    positive=label_pred)

0 instance [0.41666667 0.29166667 0.69491525 0.75      ], explanation {2: 0.4166666666666665, 3: 0.29166666666666674}, pred [1], [2.04442635]
instance [0.         1.         0.69491525 0.75      ], pred = [1], [0.76121283]

1 instance [0.58333333 0.375      0.55932203 0.5       ], explanation {2: 0.5833333333333335, 3: 0.375}, pred [1], [1.29364722]
instance [0.         1.         0.55932203 0.5       ], pred = [1], [0.0026099]

2 instance [0.47222222 0.375      0.59322034 0.58333333], explanation {2: 0.4722222222222223, 3: 0.375}, pred [1], [1.44053018]
instance [0.         1.         0.59322034 0.58333333], pred = [1], [0.23129382]

3 instance [0.38888889 0.41666667 0.54237288 0.45833333], explanation {1: 0.5423728813559322, 2: 0.38888888888888884, 3: 0.41666666666666674}, pred [1], [0.97871477]
instance [0.         0.41666667 0.54237288 0.45833333], pred = [1], [0.69241141]

4 instance [0.44444444 0.41666667 0.54237288 0.58333333], explanation {2: 0.44444444444444464, 3: 0.416666666

#### All Explanations for Negative class

In [None]:
#Weights and bias
w, b = get_svm_classifier_weights(clf)

#Positive instances
instances, labels = (df.iloc[:, :-1].values.tolist(),df.iloc[:, -1].values.tolist())  # Exclude the target column
instances = np.asarray(X_test[np.where(y_test == 0)])

negative_Xpls = []
for instance in instances:
    #Prepare to Generate one explanation
    label_pred = get_svm_classifier_boolean_pred(clf,instance)
    delta = get_delta_values(df,w,instance,positive=label_pred)
    sorted_indices = np.argsort(-np.abs(delta))  # Negative sign for descending order
    sorted_delta = delta[sorted_indices]
    threshold = find_threshold(df,w,b, instance,positive=label_pred)
    Flip = [-1 for element in instance]
    sorted_delta = np.array(sorted(delta, key=abs, reverse=True))
    Idx = 0
    Xpl = set()

    updated_Xpl,updated_threshold, updated_Idx = oneExplanation(instance, Flip, sorted_delta, sorted_indices,threshold, Idx, Xpl,positive=label_pred)
    negative_Xpls.append(updated_Xpl)



Negative Class


Idx 0, Flip [0, -1, -1, -1], threshold = -3.997311991743618, Delta[Idx] -1.8863309012360157
updated threshold -2.1109810905076025

Idx 1, Flip [0, 0, -1, -1], threshold = -2.1109810905076025, Delta[Idx] -1.639393614418113
updated threshold -0.4715874760894896

Idx 2, Flip [0, 0, 0, -1], threshold = -0.4715874760894896, Delta[Idx] -1.0913375590906547
updated threshold 0.6197500830011651

Explanation: {(1, 0.11864406779661013), (2, 0.3055555555555556), (3, 0.7916666666666665)}


Negative Class


Idx 0, Flip [0, -1, -1, -1], threshold = -3.9973119917436186, Delta[Idx] -1.9226064954905544
updated threshold -2.074705496253064

Idx 1, Flip [0, 0, -1, -1], threshold = -2.074705496253064, Delta[Idx] -1.7955263396007903
updated threshold -0.2791791566522739

Idx 2, Flip [0, 0, 0, -1], threshold = -0.2791791566522739, Delta[Idx] -0.8041434645931139
updated threshold 0.52496430794084

Explanation: {(1, 0.1016949152542373), (2, 0.13888888888888884), (3, 0.5833333333333333)}


Ne

In [None]:
verify_explanations(instances=instances.copy(),
                    explanations=negative_Xpls.copy(),
                    weights=w.copy(),
                    clf=clf,
                    sorted_indices=sorted_indices,
                    positive=label_pred)

0 instance [0.30555556 0.79166667 0.11864407 0.125     ], explanation {1: 0.11864406779661013, 2: 0.3055555555555556, 3: 0.7916666666666665}, pred [0], [-1.13100608]
instance [1.         0.79166667 0.11864407 0.125     ], pred = [0], [-0.61975008]

1 instance [0.13888889 0.58333333 0.10169492 0.04166667], explanation {1: 0.1016949152542373, 2: 0.13888888888888884, 3: 0.5833333333333333}, pred [0], [-1.15892174]
instance [1.         0.58333333 0.10169492 0.04166667], pred = [0], [-0.52496431]

2 instance [0.25       0.875      0.08474576 0.        ], explanation {2: 0.25, 1: 0.0847457627118644, 3: 0.8749999999999998}, pred [0], [-1.59353447]
instance [1.         0.875      0.08474576 0.        ], pred = [0], [-1.041378]

3 instance [0.16666667 0.66666667 0.06779661 0.        ], explanation {2: 0.16666666666666674, 1: 0.06779661016949151, 3: 0.6666666666666667}, pred [0], [-1.40396669]
instance [1.         0.66666667 0.06779661 0.        ], pred = [0], [-0.7904595]

4 instance [0.1388888