In [None]:
#!pip install anchor-exp
#!pip install pulp

In [None]:
from anchor import utils
from anchor import anchor_tabular
from pulp import *
from pulp import LpProblem, LpVariable, LpMinimize, LpInteger, lpSum, value, LpBinary,LpStatusOptimal
import pulp
import numpy as np
import pandas as pd
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore", message="Overwriting previously set objective.")
import sys
sys.path.append('..')
import svm_explainer
import utility

In [None]:
dataset = datasets.load_wine()
dataset_name = 'Wine'

In [None]:
scaler = MinMaxScaler()
scaler.fit(dataset.data)
normalized_df = scaler.transform(dataset.data)

In [None]:
lower_bound = normalized_df.min()
upper_bound = normalized_df.max()
print(normalized_df.min(),normalized_df.max())

In [None]:
targets = utility.check_targets(np.where(dataset.target == dataset.target[0],0,1))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(normalized_df, targets, test_size=0.3,random_state=107,stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

clf = svm.SVC(kernel='linear')

#Train the models using the training sets
clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)
print("Accuracy Linear:", metrics.accuracy_score(y_test, y_pred))

y_pred_train = clf.predict(X_train)
print("Accuracy on Training:", metrics.accuracy_score(y_train, y_pred_train))

y_ = clf.predict(X)
print("Accuracy on Total:", metrics.accuracy_score(y_, y))

In [None]:
#Finding Thresholds by minimizing the empiric risk
threshold_upper,threshold_lower = utility.find_thresholds(clf, X_train, y_train, wr=[0.24])

#Labeling patterns based on found thresholds
positive_indexes,negative_indexes,rejected_indexes = utility.find_indexes(clf, X, threshold_upper,threshold_lower)
print(f"Positive patterns = {len(positive_indexes)},\nNegative patterns = {len(negative_indexes)},\nRejected patterns = {len(rejected_indexes)}")

In [None]:
#Accuracy
test_accuracy = utility.calculate_accuracy(clf, threshold_upper, threshold_lower, X_test, y_test)
print(f"Test Accuracy = {test_accuracy}")

train_accuracy = utility.calculate_accuracy(clf, threshold_upper, threshold_lower, X_train, y_train)
print(f"Train Accuracy = {train_accuracy}")

all_accuracy = utility.calculate_accuracy(clf, threshold_upper, threshold_lower, X, y)
print(f"All Accuracy = {all_accuracy}")

# Anchors 

In [None]:
def generate_ro_target_set(target_set,rejected_indexes):
    target_set[rejected_indexes] = 0
    return target_set
def svm_decfun(data,classifier=clf):
    return ((classifier.dual_coef_ @ classifier.support_vectors_) @ data.T + classifier.intercept_)[0][0]
def svm_decfun_class(data,classifier=clf,Threshold_1=threshold_upper,Threshold_2=threshold_lower):
    if svm_decfun(data) > Threshold_1:
        return np.array([2]) #class 1, since [-1, 0, 1]
    elif svm_decfun(data) < Threshold_2:
        return np.array([0]) #class -1
    else:
        return np.array([1]) #class 0

In [None]:
ro_set = generate_ro_target_set(y,rejected_indexes)
ro_set = y
print(np.unique(ro_set))

In [None]:
feature_list = []
for i in range(0,len(X[0])):
    feature_list.append(str(i))
feature_list = np.array(feature_list)

explainer = anchor_tabular.AnchorTabularExplainer(
    [-1,0,1],
    feature_list,
    X)

# Time and Size Comparison

In [None]:
times_columns_names = ['Anchors', 'COIN_CBC', 'Class']
times_df  = pd.DataFrame(columns=times_columns_names)

sizes_columns_names = ['Anchors', 'COIN_CBC', 'Class']
sizes_df  = pd.DataFrame(columns=sizes_columns_names)

pulp_all_times = []
pulp_all_sizes = []
anchors_all_times = []
anchors_all_sizes = []

# Time and Size Comparison

## Singular Sample - Negative Patterns

In [None]:
feature_sizes = []
anchors_explanation = []
complete_explanations = []
anchors_times = []
if len(negative_indexes) > 0:
    for idx in negative_indexes:
        start = time.perf_counter()
        explainer.class_names[svm_decfun_class(np.atleast_2d(X[idx]))[0]]  
        exp = explainer.explain_instance(X[idx], svm_decfun_class, threshold=1)
        end = time.perf_counter()
        anchors_times.append(end - start)
        complete_explanations.append(exp.names())
        feature_sizes.append(len((exp.names())))
        anchors_explanation.append(exp.features())
    print(f"Anchors Explanation Time Mean: {np.mean(np.asarray(anchors_times))}")
    print(f"Anchors Explanation Size Mean: {np.mean(np.asarray(feature_sizes))}")
anchors_all_times.append(anchors_times)
anchors_all_sizes.append(feature_sizes)

In [None]:
pulp_times = []
pulp_explanations = []
pulp_sizes = []
for idx in negative_indexes:
    start = time.perf_counter()
    pulp_exp = svm_explainer.svm_explanation_binary(
                                    
                                    dual_coef = clf.dual_coef_,
                                    support_vectors = clf.support_vectors_,
                                    intercept = clf.intercept_,
                                    data = np.atleast_2d(X[idx]),
                                    
                                    t_lower = threshold_lower,
                                    t_upper = threshold_upper,
                                    lower_bound = lower_bound,
                                    upper_bound = upper_bound,
                                    show_log = 0,
                                    classified = "Negative",
                                    validate = False)
    end = time.perf_counter()
    pulp_times.append((end - start))
    
    pulp_explanations.append(pulp_exp[-1])
    pulp_sizes.append(len(pulp_exp[-1]))
print(f"Pulp COIN CBC Time Mean: {np.mean(np.asarray(pulp_times))}")
frequency = utility.detail_explanation(explanations = pulp_explanations, patterns = X[negative_indexes], number_of_features = len(X[0]), show_explanation = False)
print(f"Pulp COIN CBC Explanation Mean: {frequency.values.sum() / len(frequency)} feature rules")
pulp_all_times.append(pulp_times)
pulp_all_sizes.append(pulp_sizes)

In [None]:
for anch_time, pulp_time in zip(anchors_times, pulp_times):
    pattern_row = [anch_time, pulp_time,'Negative']
    times_df.loc[len(times_df), :] = pattern_row
for anch_size, pulp_size in zip(feature_sizes, pulp_sizes):
    pattern_row = [anch_size, pulp_size,'Negative']
    sizes_df.loc[len(sizes_df), :] = pattern_row

## Singular Sample - Positive Patterns

In [None]:
feature_sizes = []
anchors_explanation = []
complete_explanations = []
anchors_times = []
if len(positive_indexes) > 0:
    for idx in positive_indexes:
        start = time.perf_counter()
        explainer.class_names[svm_decfun_class(np.atleast_2d(X[idx]))[0]]  
        exp = explainer.explain_instance(X[idx], svm_decfun_class, threshold=1)
        end = time.perf_counter()
        anchors_times.append(end - start)
        complete_explanations.append(exp.names())
        feature_sizes.append(len((exp.names())))
        anchors_explanation.append(exp.features())
    print(f"Anchors Explanation Time Mean: {np.mean(np.asarray(anchors_times))}")
    print(f"Anchors Explanation Size Mean: {np.mean(np.asarray(feature_sizes))}")
anchors_all_times.append(anchors_times)
anchors_all_sizes.append(feature_sizes)

In [None]:
pulp_times = []
pulp_explanations = []
pulp_sizes = []
for idx in positive_indexes:
    start = time.perf_counter()
    pulp_exp = svm_explainer.svm_explanation_binary(
                                    
                                    dual_coef = clf.dual_coef_,
                                    support_vectors = clf.support_vectors_,
                                    intercept = clf.intercept_,
                                    data = np.atleast_2d(X[idx]),
                                    
                                    t_lower = threshold_lower,
                                    t_upper = threshold_upper,
                                    lower_bound = lower_bound,
                                    upper_bound = upper_bound,
                                    show_log = 0,
                                    classified = "Positive",
                                    validate = False)
    end = time.perf_counter()
    pulp_times.append((end - start))
    
    pulp_explanations.append(pulp_exp[-1])
    pulp_sizes.append(len(pulp_exp[-1]))
print(f"Pulp COIN CBC Time Mean: {np.mean(np.asarray(pulp_times))}")
frequency = utility.detail_explanation(explanations = pulp_explanations, patterns = X[positive_indexes], number_of_features = len(X[0]), show_explanation = False)
print(f"Pulp COIN CBC Explanation Mean: {frequency.values.sum() / len(frequency)} feature rules")
pulp_all_times.append(pulp_times)
pulp_all_sizes.append(pulp_sizes)

In [None]:
for anch_time, pulp_time in zip(anchors_times, pulp_times):
    pattern_row = [anch_time, pulp_time,'Positive']
    times_df.loc[len(times_df), :] = pattern_row
for anch_size, pulp_size in zip(feature_sizes, pulp_sizes):
    pattern_row = [anch_size, pulp_size,'Positive']
    sizes_df.loc[len(sizes_df), :] = pattern_row

## Singular Sample - Rejected Patterns

In [None]:
feature_sizes = []
anchors_explanation = []
complete_explanations = []
anchors_times = []
if len(rejected_indexes) > 0:
    for idx in rejected_indexes:
        start = time.perf_counter()
        explainer.class_names[svm_decfun_class(np.atleast_2d(X[idx]))[0]]
        exp = explainer.explain_instance(X[idx], svm_decfun_class, threshold=1)
        end = time.perf_counter()
        anchors_times.append(end - start)
        complete_explanations.append(exp.names())
        feature_sizes.append(len((exp.names())))
        anchors_explanation.append(exp.features())
    print(f"Anchors Explanation Time Mean: {np.mean(np.asarray(anchors_times))}")
    print(f"Anchors Explanation Size Mean: {np.mean(np.asarray(feature_sizes))}")
anchors_all_times.append(anchors_times)
anchors_all_sizes.append(feature_sizes)

In [None]:
pulp_times = []
pulp_explanations = []
pulp_sizes = []
if len(rejected_indexes) > 0:
    for idx in rejected_indexes:
        start = time.perf_counter()
        pulp_exp = svm_explainer.svm_explanation_rejected(
                                    
                                    dual_coef = clf.dual_coef_,
                                    support_vectors = clf.support_vectors_,
                                    intercept = clf.intercept_,
                                    t_lower = threshold_lower,
                                    t_upper = threshold_upper,
                                    lower_bound = lower_bound,
                                    upper_bound = upper_bound,
                                    data = np.atleast_2d(X[idx]),
                                    show_log = 0,
                                    validate = False)
        end = time.perf_counter()
        pulp_times.append((end - start))

        pulp_explanations.append(pulp_exp[-1])
        pulp_sizes.append(len(pulp_exp[-1]))
    print(f"Pulp COIN CBC Time Mean: {np.mean(np.asarray(pulp_times))}")
    frequency = utility.detail_explanation(explanations = pulp_explanations, patterns = X[rejected_indexes], number_of_features = len(X[0]), show_explanation = False)
    print(f"Pulp COIN CBC Explanation Mean: {frequency.values.sum() / len(frequency)} feature rules")
    pulp_all_times.append(pulp_times)
    pulp_all_sizes.append(pulp_sizes)

In [None]:
for anch_time, pulp_time in zip(anchors_times, pulp_times):
    pattern_row = [anch_time, pulp_time,'Rejected']
    times_df.loc[len(times_df), :] = pattern_row

for anch_size, pulp_size in zip(feature_sizes, pulp_sizes):
    pattern_row = [anch_size, pulp_size,'Rejected']
    sizes_df.loc[len(sizes_df), :] = pattern_row

# Results

In [None]:
anchors_negative_time = None
anchors_positive_time = None
anchors_rejected_time = None
if len(negative_indexes) >0:
    anchors_negative_time = sum(anchors_all_times[0])/len(anchors_all_times[0])
    
if len(positive_indexes) >0:
    anchors_positive_time = sum(anchors_all_times[1])/len(anchors_all_times[1])

if len(rejected_indexes) >0:
    anchors_rejected_time = sum(anchors_all_times[2])/len(anchors_all_times[2])

In [None]:
times_multiple_columns_names = ['Anchors_Mean', 'COIN_CBC_Single_Mean', 'Class']
times_multiple_df  = pd.DataFrame(columns=times_multiple_columns_names)
pattern_row = [anchors_negative_time, sum(pulp_all_times[0])/len(pulp_all_times[0]),'Negative']
times_multiple_df.loc[len(times_multiple_df), :] = pattern_row

pattern_row = [anchors_positive_time, sum(pulp_all_times[1])/len(pulp_all_times[1]), 'Positive']
times_multiple_df.loc[len(times_multiple_df), :] = pattern_row

if len(rejected_indexes) > 0:
    pattern_row = [anchors_rejected_time, sum(pulp_all_times[2])/len(pulp_all_times[2]), 'Rejected']
    times_multiple_df.loc[len(times_multiple_df), :] = pattern_row
display(times_multiple_df)


In [None]:
sizes_mean_names = ['Anchors_Mean', 'COIN_CBC_Mean', 'Class']
sizes_mean_df  = pd.DataFrame(columns=sizes_mean_names)
classes = ['Negative', 'Positive', 'Rejected']
if len(rejected_indexes) > 0:
    for i in range(3):
        pattern_row = [sum(anchors_all_sizes[i])/len(anchors_all_sizes[i]), sum(pulp_all_sizes[i])/len(pulp_all_sizes[i]), classes[i]]
        sizes_mean_df.loc[len(sizes_mean_df), :] = pattern_row
else:
    for i in range(2):
        pattern_row = [sum(anchors_all_sizes[i])/len(anchors_all_sizes[i]), sum(pulp_all_sizes[i])/len(pulp_all_sizes[i]), classes[i]]
        sizes_mean_df.loc[len(sizes_mean_df), :] = pattern_row


In [None]:
display(sizes_mean_df)