In [None]:
import docplex
import pandas as pd
import tensorflow as tf
import numpy as np
import utility
import copy
import mlp_explainer
import mymetrics
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from milp import codify_network
from teste import get_minimal_explanation
from sklearn import metrics
import time
import dataframe_image as dfi

In [None]:
#Column Dataset
df = pd.read_csv('./datasets/column_2C.dat', sep=" ", names=['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis','target'])
df['target']=np.where(df['target']=='AB',1,0)

scaler = MinMaxScaler()
scaler.fit(df.values[:, :-1])
scaled_df = scaler.transform(df.values[:, :-1])
lower_bound = scaled_df.min()
upper_bound = scaled_df.max()
print(lower_bound, upper_bound)
df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
df_scaled['target'] = targets
columns = df_scaled.columns
dir_path = 'Column'
dataset_name = 'Column'
display(df_scaled)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.3,random_state=50,stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

training_data = pd.DataFrame(X_train, columns = columns[:-1])
training_data[columns[-1]] = y_train
testing_data = pd.DataFrame(X_test, columns = columns[:-1])
testing_data[columns[-1]] = y_test
dataframe = pd.concat([training_data, testing_data])
data = dataframe.to_numpy()
n_classes = dataframe['target'].nunique()

original_bounds = [[dataframe[dataframe.columns[i]].min(),dataframe[dataframe.columns[i]].max()] for i in range(len(dataframe.columns[:-1]))]
keras_model = tf.keras.models.load_model(f'new_models/{dataset_name}.h5')

In [None]:
mp_model, output_bounds = codify_network(keras_model, dataframe, 'fischetti', relax_constraints=False)

In [None]:
predictions = []
negative_predictions = []
positive_predictions = []
negative_indexes = []
positive_indexes = []
data = testing_data.to_numpy()
for i in range(len(data)):
    predictions.append(mlp_explainer.model_classification_output(k_model=keras_model, net_input=data[i, :-1])[1].numpy())
    if predictions[-1] == 0:
        negative_indexes.append(i)
        negative_predictions.append(data[i, :-1])
    else:
        positive_indexes.append(i)
        positive_predictions.append(data[i, :-1])
print("Accuracy Test Data:", metrics.accuracy_score(testing_data.to_numpy()[:, -1], predictions))

In [None]:
cols = list(testing_data.columns)
if 'target' not in cols:
    cols.append('target')
predicted_dataset = []
classes = np.array([0, 1],dtype='int')
for instance in (testing_data.to_numpy()[:, :-1][negative_indexes]):
    instance = np.append(instance, classes[0].astype('int'))
    predicted_dataset.append(instance)
for instance in (testing_data.to_numpy()[:, :-1][positive_indexes]):
    instance = np.append(instance, classes[1])
    predicted_dataset.append(instance)
predicted_dataset = np.asarray(predicted_dataset)
pred_dataset_df = pd.DataFrame(predicted_dataset, columns=cols)
pred_dataset_df['target'] = pred_dataset_df['target'].astype('int')

In [None]:
metrics_dataframes = []
times_direct = []
sizes_direct = []
rsum_direct = []
coverage_direct = []
pos_exp_direct = []
neg_exp_direct = []

In [None]:
original_bounds

In [None]:
negative_direct_explanations = []
coverage_direct = []
for i, sample in enumerate((testing_data.to_numpy()[:, :-1][negative_indexes])):
    start = time.perf_counter()
    explanation, minimal = mlp_explainer.run_explanation(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, enable_log=False,
                                                         )
    end = time.perf_counter()
    negative_direct_explanations.append(explanation)
    times_direct.append(end-start)
    sizes_direct.append(len(minimal))
    rsum_direct.append(mymetrics.range_sum(negative_direct_explanations[-1]))
    coverage_direct.append(len(mymetrics.calculate_coverage(testing_data, negative_direct_explanations[-1])))

In [None]:
positive_direct_explanations = []
for i, sample in enumerate((testing_data.to_numpy()[:, :-1][positive_indexes])):
    start = time.perf_counter()
    explanation, minimal = mlp_explainer.run_explanation(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, enable_log=False,
                                                         )
    end = time.perf_counter()
    positive_direct_explanations.append(explanation)
    times_direct.append(end-start)
    sizes_direct.append(len(minimal))
    rsum_direct.append(mymetrics.range_sum(positive_direct_explanations[-1]))
    coverage_direct.append(len(mymetrics.calculate_coverage(testing_data, positive_direct_explanations[-1])))

In [None]:
p_values = [0.50] #[0.25, 0.50, 0.75]
for p_value in p_values:
    print(f"p = {p_value}")
    times_twostep = []
    sizes_twostep = []
    rsum_twostep = []
    coverage_twostep = []
    pos_exp_twostep = []
    neg_exp_twostep = []
    negative_twostep_explanations = []
    
    for i, sample in enumerate((testing_data.to_numpy()[:, :-1][negative_indexes])):
        start = time.perf_counter()
        explanation, minimal = mlp_explainer.run_explanation_doublestep(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, p=p_value)
        end = time.perf_counter()
        negative_twostep_explanations.append(explanation)
        times_twostep.append(end-start)
        sizes_twostep.append(len(minimal))
        rsum_twostep.append(mymetrics.range_sum(negative_twostep_explanations[-1]))
        coverage_twostep.append(len(mymetrics.calculate_coverage(testing_data, negative_twostep_explanations[-1])))

    
    positive_twostep_explanations = []
    for i, sample in enumerate((testing_data.to_numpy()[:, :-1][positive_indexes])):
        start = time.perf_counter()
        explanation, minimal = mlp_explainer.run_explanation_doublestep(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, p=p_value, enable_log=False)
        end = time.perf_counter()
        positive_twostep_explanations.append(explanation)
        times_twostep.append(end-start)
        sizes_twostep.append(len(minimal))
        rsum_twostep.append(mymetrics.range_sum(positive_twostep_explanations[-1]))
        coverage_twostep.append(len(mymetrics.calculate_coverage(testing_data, positive_twostep_explanations[-1])))

    time_mean_twostep = sum(times_twostep)/len(times_twostep)
    time_std_twostep = np.std(times_twostep)
    sizes_mean_twostep = sum(sizes_twostep)/len(sizes_twostep)
    sizes_std_twostep = np.std(sizes_twostep)
    rsum_mean_twostep = sum(rsum_twostep)/len(rsum_twostep)
    rsum_std_twostep = np.std(rsum_twostep)
    coverage_mean_twostep = sum(coverage_twostep)/len(coverage_twostep)
    coverage_std_twostep = np.std(coverage_twostep)

    time_mean_direct = sum(times_direct)/len(times_direct)
    time_std_direct = np.std(times_direct)
    sizes_mean_direct = sum(sizes_direct)/len(sizes_direct)
    sizes_std_direct = np.std(sizes_direct)
    rsum_mean_direct = sum(rsum_direct)/len(rsum_direct)
    rsum_std_direct = np.std(rsum_direct)
    coverage_mean_direct = sum(coverage_direct)/len(coverage_direct)
    coverage_std_direct = np.std(coverage_direct)

    all_metrics_names = ['Metric','Direct_MEAN','Direct_STD','Twostep_MEAN','Twostep_STD']

    all_metrics_mean_df  = pd.DataFrame(columns=all_metrics_names)
    pattern_row = ['Time',time_mean_direct, time_std_direct, time_mean_twostep,time_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row

    pattern_row = ['Size', sizes_mean_direct, sizes_std_direct, sizes_mean_twostep,sizes_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row

    pattern_row = ['Ranges_Sum', rsum_mean_direct, rsum_std_direct, rsum_mean_twostep,rsum_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row


    pattern_row = ['Coverage', coverage_mean_direct, coverage_std_direct, coverage_mean_twostep,coverage_std_twostep]
    all_metrics_mean_df.loc[len(all_metrics_mean_df), :] = pattern_row

    display(all_metrics_mean_df)

In [None]:
# dfi.export(all_metrics_mean_df, './saved_dataframe_image/'+dataset_name +"_"+ str(p_values[0])+'.jpg')
# all_metrics_mean_df.to_csv('./saved_dataframe_csv/'+dataset_name +"_"+ str(p_values[0])+'.csv',index=False)