In [1]:
import docplex
import pandas as pd
import tensorflow as tf
import numpy as np
import utility
import copy
import mlp_explainer
import mymetrics
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from milp import codify_network
from teste import get_minimal_explanation
from sklearn import metrics
import time
import dataframe_image as dfi

In [2]:
#Column Dataset
df = pd.read_csv('./datasets/column_2C.dat', sep=" ", names=['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis','target'])
df['target']=np.where(df['target']=='AB',1,0)

scaler = MinMaxScaler()
scaler.fit(df.values[:, :-1])
scaled_df = scaler.transform(df.values[:, :-1])
lower_bound = scaled_df.min()
upper_bound = scaled_df.max()
print(lower_bound, upper_bound)
df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
df_scaled['target'] = targets
columns = df_scaled.columns
dir_path = 'Column'
dataset_name = 'Column'
display(df_scaled)

0.0 1.0000000000000002
Original Targets:  [0. 1.] 
Desired Targets: [0,1]
Is original the desired [0, 1]?  True


Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,target
0,0.355710,0.519829,0.229193,0.250879,0.307452,0.025163,1
1,0.124518,0.296713,0.098622,0.144642,0.476718,0.036359,1
2,0.411651,0.513934,0.322982,0.307607,0.386171,0.017528,1
3,0.416184,0.557342,0.271255,0.289376,0.341865,0.051839,1
4,0.227238,0.289389,0.128155,0.246992,0.409614,0.044181,1
...,...,...,...,...,...,...,...
305,0.209780,0.360307,0.196886,0.193596,0.509410,0.015852,0
306,0.268036,0.487138,0.136209,0.183694,0.476288,0.024767,0
307,0.340471,0.522329,0.287900,0.234870,0.597806,0.019437,0
308,0.184221,0.272240,0.246823,0.214603,0.521239,0.026234,0


In [3]:
X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.3,random_state=50,stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

training_data = pd.DataFrame(X_train, columns = columns[:-1])
training_data[columns[-1]] = y_train
testing_data = pd.DataFrame(X_test, columns = columns[:-1])
testing_data[columns[-1]] = y_test
dataframe = pd.concat([training_data, testing_data])
data = dataframe.to_numpy()
n_classes = dataframe['target'].nunique()

original_bounds = [[dataframe[dataframe.columns[i]].min(),dataframe[dataframe.columns[i]].max()] for i in range(len(dataframe.columns[:-1]))]
keras_model = tf.keras.models.load_model(f'new_models/{dataset_name}.h5')

In [4]:
mp_model, output_bounds = codify_network(keras_model, dataframe, 'fischetti', relax_constraints=False)

In [5]:
predictions = []
negative_predictions = []
positive_predictions = []
negative_indexes = []
positive_indexes = []
data = testing_data.to_numpy()
for i in range(len(data)):
    predictions.append(mlp_explainer.model_classification_output(k_model=keras_model, net_input=data[i, :-1])[1].numpy())
    if predictions[-1] == 0:
        negative_indexes.append(i)
        negative_predictions.append(data[i, :-1])
    else:
        positive_indexes.append(i)
        positive_predictions.append(data[i, :-1])
print("Accuracy Test Data:", metrics.accuracy_score(testing_data.to_numpy()[:, -1], predictions))

Accuracy Test Data: 0.8602150537634409


In [6]:
cols = list(testing_data.columns)
if 'target' not in cols:
    cols.append('target')
predicted_dataset = []
classes = np.array([0, 1],dtype='int')
for instance in (testing_data.to_numpy()[:, :-1][negative_indexes]):
    instance = np.append(instance, classes[0].astype('int'))
    predicted_dataset.append(instance)
for instance in (testing_data.to_numpy()[:, :-1][positive_indexes]):
    instance = np.append(instance, classes[1])
    predicted_dataset.append(instance)
predicted_dataset = np.asarray(predicted_dataset)
pred_dataset_df = pd.DataFrame(predicted_dataset, columns=cols)
pred_dataset_df['target'] = pred_dataset_df['target'].astype('int')

In [7]:
metrics_dataframes = []
times_direct = []
sizes_direct = []
diagonals_direct = []
rsum_direct = []
n_feat_ranges_direct = []
coverage_direct = []
accuracy_direct = []
pos_exp_direct = []
neg_exp_direct = []

In [8]:
original_bounds

[[0.0, 1.0],
 [0.0, 1.0000000000000002],
 [0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0]]

In [9]:
negative_direct_explanations = []
coverage_direct = []
accuracy_direct = []
for i, sample in enumerate((testing_data.to_numpy()[:, :-1][negative_indexes])):
    start = time.perf_counter()
    explanation, minimal = mlp_explainer.run_explanation(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, enable_log=False,
                                                         )
    end = time.perf_counter()
    negative_direct_explanations.append(explanation)
    times_direct.append(end-start)
    sizes_direct.append(len(minimal))
    diagonals_direct.append(mymetrics.calculate_hypersolid_diagonal(negative_direct_explanations[-1]))
    rsum_direct.append(mymetrics.range_sum(negative_direct_explanations[-1]))
    coverage_direct.append(len(mymetrics.calculate_coverage(testing_data, negative_direct_explanations[-1])))
    accuracy_direct.append(mymetrics.calculate_accuracy(pred_dataset_df, negative_direct_explanations[-1], 0))
    n_feat_ranges_direct.append(mymetrics.get_num_features_with_ranges(negative_direct_explanations[-1]))

In [10]:
positive_direct_explanations = []
for i, sample in enumerate((testing_data.to_numpy()[:, :-1][positive_indexes])):
    start = time.perf_counter()
    explanation, minimal = mlp_explainer.run_explanation(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, enable_log=False,
                                                         )
    end = time.perf_counter()
    positive_direct_explanations.append(explanation)
    times_direct.append(end-start)
    sizes_direct.append(len(minimal))
    diagonals_direct.append(mymetrics.calculate_hypersolid_diagonal(positive_direct_explanations[-1]))
    rsum_direct.append(mymetrics.range_sum(positive_direct_explanations[-1]))
    coverage_direct.append(len(mymetrics.calculate_coverage(testing_data, positive_direct_explanations[-1])))
    accuracy_direct.append(mymetrics.calculate_accuracy(pred_dataset_df, positive_direct_explanations[-1], 1))


In [11]:
p_values = [0.50] #[0.25, 0.50, 0.75]
for p_value in p_values:
    print(f"p = {p_value}")
    times_twostep = []
    sizes_twostep = []
    diagonals_twostep = []
    rsum_twostep = []
    n_feat_ranges_twostep = []
    coverage_twostep = []
    accuracy_twostep = []
    pos_exp_twostep = []
    neg_exp_twostep = []
    negative_twostep_explanations = []
    
    for i, sample in enumerate((testing_data.to_numpy()[:, :-1][negative_indexes])):
        start = time.perf_counter()
        explanation, minimal = mlp_explainer.run_explanation_doublestep(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, p=p_value)
        end = time.perf_counter()
        negative_twostep_explanations.append(explanation)
        times_twostep.append(end-start)
        sizes_twostep.append(len(minimal))
        diagonals_twostep.append(mymetrics.calculate_hypersolid_diagonal(negative_twostep_explanations[-1]))
        rsum_twostep.append(mymetrics.range_sum(negative_twostep_explanations[-1]))
        coverage_twostep.append(len(mymetrics.calculate_coverage(testing_data, negative_twostep_explanations[-1])))
        accuracy_twostep.append(mymetrics.calculate_accuracy(pred_dataset_df, negative_twostep_explanations[-1], 0))
        n_feat_ranges_twostep.append(mymetrics.get_num_features_with_ranges(negative_direct_explanations[-1]))

    
    positive_twostep_explanations = []
    for i, sample in enumerate((testing_data.to_numpy()[:, :-1][positive_indexes])):
        start = time.perf_counter()
        explanation, minimal = mlp_explainer.run_explanation_doublestep(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, p=p_value, enable_log=False)
        end = time.perf_counter()
        positive_twostep_explanations.append(explanation)
        times_twostep.append(end-start)
        sizes_twostep.append(len(minimal))
        diagonals_twostep.append(mymetrics.calculate_hypersolid_diagonal(positive_twostep_explanations[-1]))
        rsum_twostep.append(mymetrics.range_sum(positive_twostep_explanations[-1]))
        coverage_twostep.append(len(mymetrics.calculate_coverage(testing_data, positive_twostep_explanations[-1])))
        accuracy_twostep.append(mymetrics.calculate_accuracy(pred_dataset_df, positive_twostep_explanations[-1], 1))
        n_feat_ranges_twostep.append(mymetrics.get_num_features_with_ranges(positive_direct_explanations[-1]))

    time_mean_twostep = sum(times_twostep)/len(times_twostep)
    time_std_twostep = np.std(times_twostep)
    sizes_mean_twostep = sum(sizes_twostep)/len(sizes_twostep)
    sizes_std_twostep = np.std(sizes_twostep)
    diagonals_mean_twostep = sum(diagonals_twostep)/len(diagonals_twostep)
    diagonals_std_twostep = np.std(diagonals_twostep)
    rsum_mean_twostep = sum(rsum_twostep)/len(rsum_twostep)
    rsum_std_twostep = np.std(rsum_twostep)
    n_feat_ranges_mean_twostep = sum(n_feat_ranges_twostep)/len(n_feat_ranges_twostep)
    n_feat_ranges_std_twostep = np.std(n_feat_ranges_twostep)
    coverage_mean_twostep = sum(coverage_twostep)/len(coverage_twostep)
    coverage_std_twostep = np.std(coverage_twostep)
    accuracy_mean_twostep = sum(accuracy_twostep)/len(accuracy_twostep)
    accuracy_std_twostep = np.std(accuracy_twostep)

    time_mean_direct = sum(times_direct)/len(times_direct)
    time_std_direct = np.std(times_direct)
    sizes_mean_direct = sum(sizes_direct)/len(sizes_direct)
    sizes_std_direct = np.std(sizes_direct)
    diagonals_mean_direct = sum(diagonals_direct)/len(diagonals_direct)
    diagonals_std_direct = np.std(diagonals_direct)
    rsum_mean_direct = sum(rsum_direct)/len(rsum_direct)
    rsum_std_direct = np.std(rsum_direct)
    n_feat_ranges_mean_direct = sum(n_feat_ranges_direct)/len(n_feat_ranges_direct)
    n_feat_ranges_std_direct = np.std(n_feat_ranges_direct)
    coverage_mean_direct = sum(coverage_direct)/len(coverage_direct)
    coverage_std_direct = np.std(coverage_direct)
    accuracy_mean_direct = sum(accuracy_direct)/len(accuracy_direct)
    accuracy_std_direct = np.std(accuracy_direct)

    all_metrics_names = ['Metric','Direct_MEAN','Direct_STD','Twostep_MEAN','Twostep_STD']

    all_metrics_mean_df  = pd.DataFrame(columns=all_metrics_names)
    pattern_row = ['Time',time_mean_direct, time_std_direct, time_mean_twostep,time_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row

    pattern_row = ['Size', sizes_mean_direct, sizes_std_direct, sizes_mean_twostep,sizes_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row


    pattern_row = ['Diagonal', diagonals_mean_direct, diagonals_std_direct, diagonals_mean_twostep,diagonals_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row


    pattern_row = ['Ranges_Sum', rsum_mean_direct, rsum_std_direct, rsum_mean_twostep,rsum_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row


    pattern_row = ['N_Feature_Ranges', n_feat_ranges_mean_direct, n_feat_ranges_std_direct, n_feat_ranges_mean_twostep,n_feat_ranges_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row


    pattern_row = ['Coverage', coverage_mean_direct, coverage_std_direct, coverage_mean_twostep,coverage_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row

    pattern_row = ['Accuracy', accuracy_mean_direct, accuracy_std_direct, accuracy_mean_twostep,accuracy_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row
    metrics_dataframes.append(all_metrics_mean_df)

    display(all_metrics_mean_df)

p = 0.75


Unnamed: 0,Metric,Direct_MEAN,Direct_STD,Twostep_MEAN,Twostep_STD
0,Time,0.213291,0.030036,0.305952,0.050138
1,Size,4.817204,1.106639,4.817204,1.106639
2,Diagonal,1.624365,0.370107,1.613928,0.368416
3,Ranges_Sum,3.599785,1.019856,3.588056,1.011401
4,N_Feature_Ranges,5.969697,0.17142,6.0,0.0
5,Coverage,1.817204,1.367411,2.0,1.586231
6,Accuracy,1.0,0.0,1.0,0.0


In [12]:
# dfi.export(all_metrics_mean_df, './saved_dataframe_image/'+dataset_name +"_"+ str(p_values[0])+'.jpg')
# all_metrics_mean_df.to_csv('./saved_dataframe_csv/'+dataset_name +"_"+ str(p_values[0])+'.csv',index=False)