# Study – Australian

In [None]:
import pandas as pd
import tensorflow as tf

from milp import codify_network
from teste import get_minimal_explanation

In [None]:
# For type annotations
import numpy as np

In [None]:
dataset_name = 'australian'

training_data = pd.read_csv(f'datasets/{dataset_name}/train.csv')
testing_data = pd.read_csv(f'datasets/{dataset_name}/test.csv')

dataframe = pd.concat([training_data, testing_data])

keras_model = tf.keras.models.load_model(f'datasets/{dataset_name}/model_4layers_{dataset_name}.h5')

data = dataframe.to_numpy()
n_classes = dataframe['target'].nunique()

In [None]:
mp_model, output_bounds = codify_network(keras_model, dataframe, 'fischetti', relax_constraints=False)

### Printing What the Model Predicted

_Aka_ printing the network output.

In [None]:
i = 5
print('i =', i)
network_input = data[i, :-1]
network_input = tf.reshape(tf.constant(network_input), [1, -1])
network_output = keras_model.predict(tf.constant(network_input))[0]
network_output = tf.argmax(network_output)

predictions = keras_model.predict(tf.constant(network_input))[0, 0]

print(f'Predictions: (ndarray[ndarray[{type(predictions)}]])', predictions)
classification: np.int64 = network_output.numpy()
print(f'Network output: ({type(classification)})', classification)

### Printing the Minimal Explanation

Minimal eplanations only indicates which inputs are relevant to get to a conclusion.

**Note:** The explanation happens _after_ the keras_model make its predictions.

In [None]:
mdl_aux = mp_model.clone()

minimal_explanation = get_minimal_explanation(mdl_aux, network_input, network_output, n_classes, 'fischetti', output_bounds)

minimal_explanation

> The keras_model predicted $C_1$ because:
>
> $x_0 = 2.967691214515491$,
>
> $x_3 = -1.408120229258977$,
>
> $x_5 = -0.790702170757714$, 
>
> $x_6 = 4.24127975754059$, 
>
> $x_7 = -0.3615292659832898$ and 
>
> $x_8 = -0.6037614142464092$.

### Trying to Improve the Explanation

Given a minimal explanation, can we improve it?

Constraints of type $x = c$ are equivalent to $x \le c \land x \ge c$.

Therefore, we need to substitute each $x = c$ constraint by the $x \le c$ and $x \ge c$ constraints.

Then, we try stretching the interval by substituting $x \le c$ by $x \le c + \Delta x$ and see if our prediction changes. If the prediction stays the same, then we substitue and try stretching it again. If the prediction changes, then this new interval isn't valid and we don't substitute. We found the upper bound of the interval, i.e. $x \le c$.

Then we try to stretch the interval to fin the lower bound. Analogously, We try substituting $c \ge x$ by ????/


We will end up with a pair of constraints the looks like $c - k_l \cdot \Delta{x} \le x$ and $x \le c + k_u \cdot \Delta{x}$, i.e. this pair represents $c - k_l \cdot \Delta{x} \le x \le c + k_u \cdot \Delta{x}$.

### Setting Up

In [None]:
import docplex

In [None]:
minimal_model = mdl_aux
testing_model = minimal_model.clone()

#### Quick Sratch

In [None]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

In [None]:
linear_constraints = testing_model.find_matching_linear_constraints('input')

for constraint in linear_constraints:
	testing_model.remove_constraint(constraint)
	testing_model.add_constraint(constraint.lhs <= constraint.rhs.clone(), 'input LE')
	testing_model.add_constraint(constraint.lhs >= constraint.rhs.clone(), 'input GE')

In [None]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

Seeing the bounds of the variables

In [None]:
for variable in testing_model.find_matching_vars('x'):
	print(variable.lb, variable, variable.ub)

In [13]:
epsilon = 0.1

variables = testing_model.find_matching_vars('x')

for constraint in linear_constraints:
	testing_model.solve()
	print('Initial constraint:' + '\t', constraint)

	variable = constraint.lhs
	while testing_model.solution is None:
		if constraint.sense == docplex.mp.constants.ComparisonType.LE:
			if constraint.rhs.constant <= variable.ub:
				constraint.rhs += epsilon
			else:
				break
		elif constraint.sense == docplex.mp.constants.ComparisonType.GE:
			if constraint.rhs.constant >= variable.lb:
				constraint.rhs -= epsilon
			else:
				break
		else:
			raise Exception('Constraint sense was neither LE nor GE')

		testing_model.solve()

	# Undo last operation
	if constraint.sense == docplex.mp.constants.ComparisonType.LE:
		constraint.rhs -= epsilon
	elif constraint.sense == docplex.mp.constants.ComparisonType.GE:
		constraint.rhs += epsilon

	print('Final constraint:' + '\t', constraint)
	print()

**TODO:** Rewrite pair of expression of type $x \le c$ and $x \ge c$ to $x = c$:

In [None]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

Note that `x_6` is actually equal to `4.24127975754059`

In [None]:
number_of_inputs = len(dataframe.columns.drop('target'))
for i in range(number_of_inputs):
	constraints_of_x_i = filter(lambda x: x.lhs.name == f'x_{i}', linear_constraints)
	constraints = [c for c in constraints_of_x_i]

	if len(constraints) == 2:
		if constraints[0].rhs.constant == constraints[1].rhs.constant:
			testing_model.remove_constraints(constraints)
			testing_model.add_constraint(constraints[0].lhs == constraints[0].rhs, 'input')

In [None]:
improved_explanation = testing_model.find_matching_linear_constraints('input')
improved_explanation

### Pretty Printing the Explanation

In [None]:
def get_variable_index(variable: docplex.mp.dvar.Var) -> int:
	index = variable.name.split('_')[1]
	return int(index)

In [None]:
def print_explanation(explanation: list[docplex.mp.constr.LinearConstraint]):
	for e in explanation:
		variable = e.lhs
		index = get_variable_index(variable)
		feature_name = dataframe.columns[index]
		print(feature_name, e.sense.operator_symbol, e.rhs)

In [None]:
print_explanation(improved_explanation)

## Comparing with Anchor

In [None]:
from anchor import utils

### Loading the Dataset

In [None]:
d = utils.load_csv_dataset(
	data=f'datasets/{dataset_name}/test.csv',
	target_idx=-1,
	feature_names=['A1','A2','A3','A4','A5','A6','A7','A8','A9','A10','A11','A12','A13','A14','target'],
	# categorical_features=None,
	# features_to_use=None,
	# feature_transformations=None,
	# discretize=False,
	# balance=False,
	# fill_na='-1',
	# filter_fn=None,
	skip_first=True
)

### Explainer

In [None]:
from anchor import anchor_tabular

In [None]:
explainer = anchor_tabular.AnchorTabularExplainer(
    d.class_names,
    d.feature_names,
    d.train,
    d.categorical_names)

In [None]:
predict_fn = lambda x: tf.argmax(keras_model.predict(x)[0]).numpy().reshape(1)

In [None]:
for a in d.train:
	a == data[i]

In [None]:
exp = explainer.explain_instance(data[i, :-1], predict_fn)

In [None]:
exp.names()

In [None]:
print_explanation(improved_explanation)

In [None]:
exp.precision()

In [None]:
exp.coverage()

In [None]:
# explainer.explain_instance(
# 	data_row,
# 	classifier_fn,
# 	threshold=0.95,
# 	delta=0.1,
# 	tau=0.15,
# 	batch_size=100,
# 	max_anchor_size=None,
# 	desired_label=None,
# 	**kwargs)