In [1]:
import docplex
import pandas as pd
import tensorflow as tf
import numpy as np
import utility
import copy
import mlp_explainer
import mymetrics
import time
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from milp import codify_network
from teste import get_minimal_explanation
from sklearn.metrics import classification_report

In [2]:
#SklearnDatasets
dataset = datasets.load_iris()
df = pd.DataFrame(dataset.data, columns = dataset.feature_names)
scaler = MinMaxScaler()
scaler.fit(dataset.data)
scaled_df = scaler.transform(dataset.data)
lower_bound = scaled_df.min()
upper_bound = scaled_df.max()
print(lower_bound, upper_bound)
df_scaled = pd.DataFrame(scaled_df, columns=df.columns)
targets = dataset.target
df_scaled['target'] = targets
columns = df_scaled.columns
dataset_name = 'Iris'
result_path = f'{dataset_name}_results'
if not os.path.exists(result_path):
    os.makedirs(result_path)
    print(f"Created directory: {result_path}")
else:
    print(f"Directory already exists: {result_path}")

display(df_scaled)

0.0 1.0
Directory already exists: Iris_results


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,0.222222,0.625000,0.067797,0.041667,0
1,0.166667,0.416667,0.067797,0.041667,0
2,0.111111,0.500000,0.050847,0.041667,0
3,0.083333,0.458333,0.084746,0.041667,0
4,0.194444,0.666667,0.067797,0.041667,0
...,...,...,...,...,...
145,0.666667,0.416667,0.711864,0.916667,2
146,0.555556,0.208333,0.677966,0.750000,2
147,0.611111,0.416667,0.711864,0.791667,2
148,0.527778,0.583333,0.745763,0.916667,2


In [3]:
np.random.seed(50)
X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)

training_data = pd.DataFrame(X_train, columns = columns[:-1])
training_data[columns[-1]] = y_train
testing_data = pd.DataFrame(X_test, columns = columns[:-1])
testing_data[columns[-1]] = y_test
dataframe = pd.concat([training_data, testing_data])
data = dataframe.to_numpy()
n_classes = dataframe['target'].nunique()

original_bounds = [[dataframe[dataframe.columns[i]].min(),dataframe[dataframe.columns[i]].max()] for i in range(len(dataframe.columns[:-1]))]
keras_model = tf.keras.models.load_model(f'new_models/{dataset_name}.h5')



In [4]:
mp_model, output_bounds = codify_network(keras_model, dataframe, 'fischetti', relax_constraints=False)

In [5]:
predictions = []
possible_classes = np.unique(y_test)
class_indexes = []
class_predictions = []
for i in range(n_classes):
    class_indexes.append([])
    class_predictions.append([])
possible_classes, class_indexes, class_predictions
data = testing_data.to_numpy()
for i in range(len(data)):
    predictions.append(mlp_explainer.model_classification_output(k_model=keras_model, net_input=data[i, :-1])[1].numpy())    
    for j,p_class in enumerate(possible_classes):
        if predictions[-1] == p_class:
            class_indexes[j].append(i)
            class_predictions[j].append(data[i, :-1])
print("Metrics:", classification_report(testing_data.to_numpy()[:, -1], predictions,digits=4))

Metrics:               precision    recall  f1-score   support

         0.0     1.0000    1.0000    1.0000        38
         1.0     1.0000    0.3243    0.4898        37
         2.0     0.6032    1.0000    0.7525        38

    accuracy                         0.7788       113
   macro avg     0.8677    0.7748    0.7474       113
weighted avg     0.8666    0.7788    0.7497       113



In [6]:
cols = list(testing_data.columns)
if 'target' not in cols:
    cols.append('target')
predicted_dataset = []
for i,pos_class in enumerate(np.unique(y_test)):
    for instance in (testing_data.to_numpy()[:, :-1][class_indexes[i]]):
        instance = np.append(instance, pos_class.astype('int'))
        predicted_dataset.append(instance)
predicted_dataset = np.asarray(predicted_dataset)
pred_dataset_df = pd.DataFrame(predicted_dataset, columns=cols)
pred_dataset_df['target'] = pred_dataset_df['target'].astype('int')

In [7]:
metrics_dataframes = []
times_onestep = []
sizes_onestep = []
rsum_onestep = []
coverage_onestep = []
pos_exp_onestep = []
neg_exp_onestep = []

In [8]:
original_bounds

[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]

In [9]:
onestep_explanations = []
for j in range(len(pred_dataset_df['target'].unique())):
    for i, sample in enumerate((testing_data.to_numpy()[:, :-1][class_indexes[j]])):
        start = time.perf_counter()
        explanation, minimal = mlp_explainer.run_explanation(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, enable_log=False,
                                                             )
        end = time.perf_counter()
        onestep_explanations.append(explanation)
        times_onestep.append(end-start)
        sizes_onestep.append(len(minimal))
        rsum_onestep.append(mymetrics.range_sum(onestep_explanations[-1]))
        coverage_onestep.append(len(mymetrics.calculate_coverage(testing_data, onestep_explanations[-1])))

In [10]:
def compute_mean_std(arr):
    return np.mean(arr), np.std(arr)

def relative_percentage_diff(new, old):
    if np.any(old == 0):
        print(f'Warning: found possible division by zero')
        return np.where(old != 0, ((new - old) / old) * 100, np.nan)
    return ((new - old) / old) * 100

p_values = [0.75] #[0.25, 0.50, 0.75]
for p_value in p_values:
    print(f"p = {p_value}")
    times_twostep = []
    sizes_twostep = []
    rsum_twostep = []
    coverage_twostep = []
    twostep_explanations = []
    for j in range(len(pred_dataset_df['target'].unique())):
        for i, sample in enumerate((testing_data.to_numpy()[:, :-1][class_indexes[j]])):
            start = time.perf_counter()
            
            explanation, minimal = mlp_explainer.run_explanation_doublestep(sample = sample, n_classes=n_classes, kmodel=keras_model, model=mp_model, output_bounds=output_bounds, og_bounds=original_bounds, p=p_value)
            end = time.perf_counter()
            twostep_explanations.append(explanation)
            times_twostep.append(end-start)
            sizes_twostep.append(len(minimal))
            rsum_twostep.append(mymetrics.range_sum(twostep_explanations[-1]))
            
            coverage_twostep.append(len(mymetrics.calculate_coverage(testing_data, twostep_explanations[-1])))

    times_onestep = np.array(times_onestep)
    times_twostep = np.array(times_twostep)
    sizes_onestep = np.array(sizes_onestep)
    sizes_twostep = np.array(sizes_twostep)
    rsum_onestep = np.array(rsum_onestep)
    rsum_twostep = np.array(rsum_twostep)
    coverage_onestep = np.array(coverage_onestep)
    coverage_twostep = np.array(coverage_twostep)

    # Compute means and standard deviations
    (time_mean_onestep, time_std_onestep) = compute_mean_std(times_onestep)
    (time_mean_twostep, time_std_twostep) = compute_mean_std(times_twostep)
    
    (sizes_mean_onestep, sizes_std_onestep) = compute_mean_std(sizes_onestep)
    (sizes_mean_twostep, sizes_std_twostep) = compute_mean_std(sizes_twostep)
    
    (rsum_mean_onestep, rsum_std_onestep) = compute_mean_std(rsum_onestep)
    (rsum_mean_twostep, rsum_std_twostep) = compute_mean_std(rsum_twostep)
    
    (coverage_mean_onestep, coverage_std_onestep) = compute_mean_std(coverage_onestep)
    (coverage_mean_twostep, coverage_std_twostep) = compute_mean_std(coverage_twostep)

    # Compute relative percentage differences (Mean & Std)
    time_mean_diff = relative_percentage_diff(time_mean_twostep, time_mean_onestep)
    sizes_mean_diff = relative_percentage_diff(sizes_mean_twostep, sizes_mean_onestep)
    rsum_mean_diff = relative_percentage_diff(rsum_mean_twostep, rsum_mean_onestep)
    coverage_mean_diff = relative_percentage_diff(coverage_mean_twostep, coverage_mean_onestep)
    
    time_std_diff = relative_percentage_diff(time_std_twostep, time_std_onestep)
    sizes_std_diff = relative_percentage_diff(sizes_std_twostep, sizes_std_onestep)
    rsum_std_diff = relative_percentage_diff(rsum_std_twostep, rsum_std_onestep)
    coverage_std_diff = relative_percentage_diff(coverage_std_twostep, coverage_std_onestep)
    
    # Compute pointwise relative differences
    time_relative_pointwise = relative_percentage_diff(times_twostep, times_onestep)
    sizes_relative_pointwise = relative_percentage_diff(sizes_twostep, sizes_onestep)
    rsum_relative_pointwise = relative_percentage_diff(rsum_twostep, rsum_onestep)
    coverage_relative_pointwise = relative_percentage_diff(coverage_twostep, coverage_onestep)
    
    # Compute pointwise means
    time_relative_mean = np.mean(time_relative_pointwise) 
    sizes_relative_mean = np.mean(sizes_relative_pointwise)
    rsum_relative_mean = np.mean(rsum_relative_pointwise)
    coverage_relative_mean = np.mean(coverage_relative_pointwise)
    
    # Compute pointwise standard deviations
    time_relative_std = np.std(time_relative_pointwise) 
    sizes_relative_std = np.std(sizes_relative_pointwise)
    rsum_relative_std = np.std(rsum_relative_pointwise)
    coverage_relative_std = np.std(coverage_relative_pointwise)
    
    # Organize Data
    all_metrics_data = {
        'Metric': ['Time', 'Size', 'Ranges_Sum', 'Coverage'],
        'ONESTEP_MEAN': [time_mean_onestep, sizes_mean_onestep, rsum_mean_onestep, coverage_mean_onestep],
        'ONESTEP_STD': [time_std_onestep, sizes_std_onestep, rsum_std_onestep, coverage_std_onestep],
        'TWOSTEP_MEAN': [time_mean_twostep, sizes_mean_twostep, rsum_mean_twostep, coverage_mean_twostep],
        'TWOSTEP_STD': [time_std_twostep, sizes_std_twostep, rsum_std_twostep, coverage_std_twostep],
        'MEAN_DIFF_%': [time_mean_diff, sizes_mean_diff, rsum_mean_diff, coverage_mean_diff],
        'STD_DIFF_%': [time_std_diff, sizes_std_diff, rsum_std_diff, coverage_std_diff],
        'POINTWISE_MEAN_%': [time_relative_mean, sizes_relative_mean, rsum_relative_mean, coverage_relative_mean],
        'POINTWISE_STD_%': [time_relative_std, sizes_relative_std, rsum_relative_std, coverage_relative_std]
    }
    # Display and save
    all_metrics_df = pd.DataFrame(all_metrics_data)
    display(all_metrics_df)
    all_metrics_df.to_csv(f'{result_path}/results_{p_value}.csv', index=False)
    
    #Save Raw Metric Data
    raw_df = pd.DataFrame({
        "times_onestep": times_onestep, 
        "times_twostep": times_twostep,
        "sizes_onestep": sizes_onestep, 
        "sizes_twostep": sizes_twostep,
        "rsum_onestep": rsum_onestep, 
        "rsum_twostep": rsum_twostep,
        "coverage_onestep": coverage_onestep, 
        "coverage_twostep": coverage_twostep,
        "time_relative_%": time_relative_pointwise,
        "sizes_relative_%": sizes_relative_pointwise,
        "rsum_relative_%": rsum_relative_pointwise,
        "coverage_relative_%": coverage_relative_pointwise
    })
    
    # Save to CSV
    raw_df.to_csv(f"{result_path}/raw_metric_data_{p_value}.csv", index=False)

    # Save onestep explanations
    np.savez(f'{result_path}/onestep_explanations_{p_value}.npz', 
             onestep_explanations=onestep_explanations)
    
    # Save twostep explanations
    np.savez(f'{result_path}/twostep_explanations{p_value}.npz', 
             twostep_explanations=twostep_explanations)

p = 0.75


Unnamed: 0,Metric,ONESTEP_MEAN,ONESTEP_STD,TWOSTEP_MEAN,TWOSTEP_STD,MEAN_DIFF_%,STD_DIFF_%,POINTWISE_MEAN_%,POINTWISE_STD_%
0,Time,0.166112,0.031777,0.210011,0.035523,26.427442,11.79008,29.287164,27.288008
1,Size,2.831858,0.677092,2.831858,0.677092,0.0,0.0,0.0,0.0
2,Ranges_Sum,2.092496,0.691916,2.134213,0.652762,1.993634,-5.658714,3.265488,5.600234
3,Coverage,19.566372,14.482138,22.646018,12.947765,15.739484,-10.594937,73.727591,163.165583
