In [None]:
from pulp import *
from pulp import LpProblem, LpVariable, LpMinimize, LpInteger, lpSum, value, LpBinary,LpStatusOptimal
import pulp
import numpy as np
import pandas as pd
import time
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings("ignore", message="Overwriting previously set objective.")
import utility
import docplex.mp.model
import docplex
import docplex_explainer
import mymetrics
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns
import joblib
import mymetrics

In [None]:
column_names = {
    'Iris': datasets.load_iris().feature_names,
    'Wine': datasets.load_wine().feature_names,
    'Breast_Cancer': datasets.load_breast_cancer().feature_names,
    'Vertebral-Column': ['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis'],
    'Pima': pd.read_csv('./datasets/diabetes.csv').columns[:-1].values,
    'Parkinsons': pd.read_csv('./datasets/parkinsons.csv').columns[:-1].values,
    'Blood_Transfusion': pd.read_csv('./datasets/blood_transfusion.csv').columns[:-1].values,
    'Ionosphere': pd.read_csv('./datasets/ionosphere.csv').columns[:-1].values,
    'Glass': pd.read_csv('./datasets/glass.csv').columns[:-1].values,
    'Climate': pd.read_csv('./datasets/climate_model_simulation_crashes.csv').columns[:-1].values,
    'Modeling': pd.read_csv('./datasets/User_Knowledge_Modeling.csv').columns[:-1].values,
    'Banknote': pd.read_csv('./datasets/banknote_authentication.csv').columns[:-1].values
}

In [None]:
# Load Dataset
dataset_name = ['Iris', 'Wine', 'Vertebral-Column', 'Pima', 'Parkinsons', 'Breast_Cancer', 'Blood_Transfusion', 'Ionosphere', 'Glass', 'Climate', 'Modeling', 'Banknote']#, Sonar
for dataset in dataset_name:
    scaler = joblib.load(f'models/{dataset}_scaler.pkl')
    loaded_data = np.load(f'{dataset}_results/pos_explanations_0.25.npz')
    onestep_pos = loaded_data['pos_exp_onestep'].round(12)
    twostep_pos = loaded_data['pos_exp_twostep'].round(12)
    
    loaded_data = np.load(f'{dataset}_results/neg_explanations_0.25.npz')
    onestep_neg = loaded_data['neg_exp_onestep'].round(12)
    twostep_neg = loaded_data['neg_exp_twostep'].round(12)
    X_test = pd.read_csv(f'{dataset}_results/{dataset}_X_test_predicted.csv').round(12)
    
    print(f'\n\n#################### {dataset.upper()} ####################')
    # Find 1 Onestep smallest explanations
    counts = np.sum((onestep_pos[:, :, 0] == 0) & (onestep_pos[:, :, 1] == 1), axis=1)
    min_index = np.argsort(counts)[-1:]
    onestep_pos = onestep_pos[min_index]

    counts = np.sum((onestep_neg[:, :, 0] == 0) & (onestep_neg[:, :, 1] == 1), axis=1)
    min_index = np.argsort(counts)[-1:]
    onestep_neg = onestep_neg[min_index]

    # Find 1 Twostepstep smallest explanations
    counts = np.sum((twostep_pos[:, :, 0] == 0) & (twostep_pos[:, :, 1] == 1), axis=1)
    min_index = np.argsort(counts)[-1:]
    twostep_pos = twostep_pos[min_index]

    counts = np.sum((twostep_neg[:, :, 0] == 0) & (twostep_neg[:, :, 1] == 1), axis=1)
    min_index = np.argsort(counts)[-1:]
    twostep_neg = twostep_neg[min_index]

    #Find Positive Instance covered by both Onestep and Twostep
    instances_onestep = scaler.inverse_transform(mymetrics.calculate_coverage(X_test,onestep_pos[0]).values[:,:-1])
    instances_twostep = scaler.inverse_transform(mymetrics.calculate_coverage(X_test,twostep_pos[0]).values[:,:-1])
    instance = np.array([row for row in instances_onestep if any(np.all(row == instances_twostep, axis=1))])[0]
    instance_df = pd.DataFrame(data=instance.reshape(1,-1),columns=column_names[dataset])
    display(instance_df)

    #Onestep
    onestep_exp = scaler.inverse_transform(onestep_pos[0].T)
    onestep_df = pd.DataFrame(data=onestep_exp,columns=column_names[dataset])
    print(f'Onestep_pos - relevant features: {np.where((onestep_pos[0] != [onestep_pos[0].min(), onestep_pos[0].max()]).any(axis=1))}')
    display(onestep_df)

    #Twostep
    twostep_exp = scaler.inverse_transform(twostep_pos[0].T)
    twostep_df = pd.DataFrame(data=twostep_exp,columns=column_names[dataset])
    print(f'Twostep_pos - relevant features: {np.where((twostep_pos[0] != [twostep_pos[0].min(), twostep_pos[0].max()]).any(axis=1))}')
    display(twostep_df)

    min_bounds = np.min(onestep_exp, axis=0)  # Minimum values per feature
    max_bounds = np.max(onestep_exp, axis=0)  # Maximum values per feature
    within_bounds = np.all((instance >= min_bounds) & (instance <= max_bounds))

    if within_bounds:
        print(f"Great! {dataset}_instance_pos is within Onestep bounds.")
    else:
        print(f"ERROR {dataset}_instance_pos is OUTSIDE Onestep bounds.")

    min_bounds = np.min(twostep_exp, axis=0)  # Minimum values per feature
    max_bounds = np.max(twostep_exp, axis=0)  # Maximum values per feature

    within_bounds = np.all((instance >= min_bounds) & (instance <= max_bounds))

    if within_bounds:
        print(f"Great! {dataset}_instance_pos is within Twostep bounds.")
    else:
        print(f"ERROR {dataset}_instance_pos is OUTSIDE Twostep bounds.")
    print('----------------------------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------------------------')

    #Find Negative Instance covered by both Onestep and Twostep
    instances_onestep = scaler.inverse_transform(mymetrics.calculate_coverage(X_test,onestep_neg[0]).values[:,:-1])
    instances_twostep = scaler.inverse_transform(mymetrics.calculate_coverage(X_test,twostep_neg[0]).values[:,:-1])
    instance = np.array([row for row in instances_onestep if any(np.all(row == instances_twostep, axis=1))])[0]
    instance_df = pd.DataFrame(data=instance.reshape(1,-1),columns=column_names[dataset])
    display(instance_df)

    #Onestep
    onestep_exp = scaler.inverse_transform(onestep_neg[0].T)
    onestep_df = pd.DataFrame(data=onestep_exp,columns=column_names[dataset])
    print(f'Onestep_neg - relevant features: {np.where((onestep_neg[0] != [onestep_neg[0].min(), onestep_neg[0].max()]).any(axis=1))}')
    display(onestep_df)

    #Twostep
    twostep_exp = scaler.inverse_transform(twostep_neg[0].T)
    twostep_df = pd.DataFrame(data=twostep_exp,columns=column_names[dataset])
    print(f'Twostep_neg - relevant features: {np.where((twostep_neg[0] != [twostep_neg[0].min(), twostep_neg[0].max()]).any(axis=1))}')
    display(twostep_df)

    min_bounds = np.min(onestep_exp, axis=0)  # Minimum values per feature
    max_bounds = np.max(onestep_exp, axis=0)  # Maximum values per feature
    within_bounds = np.all((instance >= min_bounds) & (instance <= max_bounds))

    if within_bounds:
        print(f"Great! {dataset}_instance_neg is within Onestep bounds.")
    else:
        print(f"ERROR {dataset}_instance_neg is OUTSIDE Onestep bounds.")

    min_bounds = np.min(twostep_exp, axis=0)  # Minimum values per feature
    max_bounds = np.max(twostep_exp, axis=0)  # Maximum values per feature

    within_bounds = np.all((instance >= min_bounds) & (instance <= max_bounds))

    if within_bounds:
        print(f"Great! {dataset}_instance_neg is within Twostep bounds.")
    else:
        print(f"ERROR{dataset}_instance_neg is OUTSIDE Twostep bounds.")
    print('----------------------------------------------------------------------------------------------')
    print('----------------------------------------------------------------------------------------------')
    