In [None]:
from pulp import *
from pulp import LpProblem, LpVariable, LpMinimize, LpInteger, lpSum, value, LpBinary,LpStatusOptimal
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore", message="Overwriting previously set objective.")
import sys
sys.path.append('..')
import svm_explainer
import utility

# Dataset loading and scaling

In [None]:
#Breast Cancer Dataset - 30 Features
dataset = datasets.load_breast_cancer()
df = pd.DataFrame(dataset.data, columns = dataset.feature_names)

In [None]:
#Scaling dataset features
scaler = MinMaxScaler()
scaler.fit(dataset.data)
scaled_df = scaler.transform(dataset.data)
print(scaled_df.min(),scaled_df.max())

In [None]:
#Changing the patterns to follow a [-1, 1] pattern.
targets = utility.check_targets(np.where(dataset.target == dataset.target[0],0,1))

In [None]:
#Splitting into train/test set
X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.3, random_state=107, stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

# Training the Support Vector Classifier

In [None]:
#Training the model using the training set
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)

#Predict for test set
y_pred = clf.predict(X_test)
print("Accuracy Linear:", metrics.accuracy_score(y_test, y_pred))

y_pred_train = clf.predict(X_train)
print("Accuracy on Training:", metrics.accuracy_score(y_train, y_pred_train))

y_ = clf.predict(X)
print("Accuracy on All:", metrics.accuracy_score(y_, y))

# Generating Explanations

## Finding Thresholds and Labeling Patterns

In [None]:
#Finding Thresholds by minimizing the empiric risk
threshold_upper,threshold_lower = utility.find_thresholds(clf, X_train, y_train, wr=[0.24])

#Accuracy on Test data
test_accuracy = utility.calculate_accuracy(clf, threshold_upper, threshold_lower, X_test, y_test)
print(f"Test Accuracy = {test_accuracy}")

train_accuracy = utility.calculate_accuracy(clf, threshold_upper, threshold_lower, X_train, y_train)
print(f"Train Accuracy = {train_accuracy}")

all_accuracy = utility.calculate_accuracy(clf, threshold_upper, threshold_lower, X, y)
print(f"All Accuracy = {train_accuracy}")

#Labeling patterns based on found thresholds
positive_indexes,negative_indexes,rejected_indexes = utility.find_indexes(clf, X, threshold_upper,threshold_lower)
print(f"Positive patterns = {len(positive_indexes)},\nNegative patterns = {len(negative_indexes)},\nRejected patterns = {len(rejected_indexes)}")

## Explanations for Rejected Class Patterns

In [None]:
if len(rejected_indexes) > 0:
    explanation = svm_explainer.svm_explanation_rejected(
                                    dual_coef = clf.dual_coef_,
                                    support_vectors = clf.support_vectors_,
                                    intercept = clf.intercept_,
                                    t_lower = threshold_lower,
                                    t_upper = threshold_upper,
                                    lower_bound = scaled_df.min(),
                                    upper_bound = scaled_df.max(),
                                    data = X[rejected_indexes],
                                    show_log = 0,
                                    n_threads = 4)
    utility.detail_explanation(explanations = explanation, patterns = X[rejected_indexes], number_of_features = len(X[0]), feature_names = dataset.feature_names, show_explanation = True)
    print("Mean size of explanation: ", sum([(len(explanation[i])) for i in range(len(explanation))])/len(explanation))

## Explanations for Negative Class Patterns

In [None]:
if len(negative_indexes) > 0:
    explanation = svm_explainer.svm_explanation_binary(
                                    dual_coef = clf.dual_coef_,
                                    support_vectors = clf.support_vectors_,
                                    intercept = clf.intercept_,
                                    t_lower = threshold_lower,
                                    t_upper = threshold_upper,
                                    lower_bound = scaled_df.min(),
                                    upper_bound = scaled_df.max(),
                                    show_log = 0,
                                    n_threads = 4,
                                    data = X[negative_indexes],
                                    classified = "Negative")
    utility.detail_explanation(explanations = explanation, patterns = X[negative_indexes], number_of_features = len(X[0]), show_explanation = False)
    print("Mean size of explanation: ", sum([(len(explanation[i])) for i in range(len(explanation))])/len(explanation))

## Explanations for Positive Class Patterns

In [None]:
if len(positive_indexes) > 0:
    explanation = svm_explainer.svm_explanation_binary(
                                    dual_coef = clf.dual_coef_,
                                    support_vectors = clf.support_vectors_,
                                    intercept = clf.intercept_,
                                    t_lower = threshold_lower,
                                    t_upper = threshold_upper,
                                    lower_bound = scaled_df.min(),
                                    upper_bound = scaled_df.max(),
                                    show_log = 0,
                                    n_threads = 4,
                                    data = X[positive_indexes],
                                    classified = "Positive")
    utility.detail_explanation(explanations = explanation, patterns = X[positive_indexes], number_of_features = len(X[0]), show_explanation = True)
    print("Mean size of explanation: ", sum([(len(explanation[i])) for i in range(len(explanation))])/len(explanation))