# Imports

In [25]:
import numpy as np
import pandas as pd

from anchor import utils
from anchor import anchor_tabular
import re
import sklearn
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from pmlb import fetch_data
from z3 import *

# Model

## Functions

In [49]:
def feature_constraints_expression(X, round = 0):
    constraints = []

    for i in range(X.shape[1]):
        feature_values = X[:, i] * 10**round
        # np.unique
        min_val, max_val = feature_values.min(), feature_values.max()

        x = Real(f"x{i}")
        min = RealVal(min_val)
        max = RealVal(max_val)

        constraint = And(min <= x, x <= max)
        constraints.append(constraint)

    return And(*constraints)

In [27]:
def tree_paths_expression(tree, tree_index, class_index, round = 0):
    tree_ = tree.tree_
    feature = tree_.feature
    threshold = tree_.threshold
    value = tree_.value

    paths = []
    o = Real(f"o_{tree_index}_{class_index}")

    def traverse(node, path_conditions):

        if feature[node] == -2:
            leaf_value = value[node][0][0]
            path_formula = And(path_conditions)
            implication = Implies(path_formula, o == leaf_value)
            paths.append(implication)
        else:

            x = Real(f"x{feature[node]}")
            left_condition = x <= threshold[node]
            right_condition = x > threshold[node]
            traverse(tree_.children_left[node], path_conditions + [left_condition])
            traverse(tree_.children_right[node], path_conditions + [right_condition])

    traverse(0, [])
    return And(*paths)

In [28]:
def model_trees_expression(model):
    formulas = []
    for i, estimators in enumerate(model.estimators_):
        for class_index, estimator in enumerate(estimators):
            formula = tree_paths_expression(estimator, i, class_index)
            formulas.append(formula)
    return And(*formulas)

In [29]:
def decision_function_expression(model, x):
    learning_rate = model.learning_rate
    estimators = model.estimators_
    n_classes = 1 if model.n_classes_ <= 2 else model.n_classes_

    decision = model.decision_function(x)
    predicted_class = model.predict(x)[0]

    estimator_results = []
    for estimator in estimators:
        class_predictions = [tree.predict(x) for tree in estimator]
        estimator_results.append(class_predictions)

    estimator_sum = np.sum(estimator_results, axis=0) * learning_rate
    init_value = decision - estimator_sum.T

    equation_list = []
    for class_number in range(n_classes):
        estimator_list = []
        for estimator_number in range(len(estimators)):
            o = Real(f"o_{estimator_number}_{class_number}")
            estimator_list.append(o)
        equation_o = Sum(estimator_list) * learning_rate + init_value[0][class_number]
        equation_list.append(equation_o)

    if n_classes <= 2:
        if predicted_class == 0:
            final_equation = equation_list[0] < 0
        else:
            final_equation = equation_list[0] > 0
    else:
        compare_equation = []
        for class_number in range(n_classes):
            if predicted_class != class_number:
                compare_equation.append(
                    equation_list[predicted_class] > equation_list[class_number]
                )
        final_equation = compare_equation

    return And(final_equation)

anchor to expressions functions

In [43]:
def make_expression(feature, operator, value):
    z3feature = Real(feature)
    if operator == "<=":
        expression = z3feature <= float(value)
    elif operator == ">=":
        expression = z3feature >= float(value)
    elif operator == "<":
        expression = z3feature < float(value)
    elif operator == ">":
        expression = z3feature > float(value)
    elif operator == "==" or operator == "=":
        expression = z3feature == float(value)
    return expression


def anchor_z3_expression(exp):
    pattern = r"x\d+"
    operator_map = {"<": ">", ">": "<", "<=": ">=", ">=": "<=", "=": "=", "==": "=="}

    expressions = []
    features = []
    for name in exp:
        tokens = name.split(" ")
        match = re.search(pattern, name)

        if match:
            feature = match.group()
            if tokens[0] == feature:
                operator, value = tokens[1], tokens[2]
                expressions.append(make_expression(feature, operator, value))
            elif tokens[2] == feature and len(tokens) < 5:
                operator = operator_map[tokens[1]]
                value = tokens[0]
                expressions.append(make_expression(feature, operator, value))
            elif len(tokens) == 5:
                operator1 = operator_map[tokens[1]]
                operator2 = tokens[3]
                expressions.append(make_expression(feature, operator1, tokens[0]))
                expressions.append(make_expression(feature, operator2, tokens[4]))
            else:
                print("expression error")
                continue
            features.append(feature)

    return expressions, features

## Class

In [None]:
def explain_when_delta_zero(self, instance, verbose=False):
    print("explain_when_delta_zero:")
    opt = Optimize()

    anchor_variables = []
    for formula in self.anchor_expressions:
      anchor_variables.append(str(formula.arg(0)))

    feature_names = [f'x{i}' for i in range(instance.shape[0])]
    opt.add(delta >= 0)
    for i, var in enumerate(feature_names):
      if var in anchor_variables:
        z3_var = Real(var)
        opt.add((instance[i]) - delta <= z3_var, z3_var <= (instance[i]) + delta)
        # print(f'{instance[i]} - {delta} <= {var}, {var} <= {instance[i]} + {delta}')

    # not D
    self.D = decision_function_expression(self.model, [instance])

    # model
    opt.add(self.T)
    opt.add(Not(self.D))

    # minimize delta
    opt.minimize(delta)

    if opt.check() == sat:
      model = opt.model()
      delta_value = model.eval(delta).as_decimal(3)
      delta_value = delta_value.replace('?', '')
      print(f'delta = {delta_value}')

      constraints = []

      for i, var in enumerate(feature_names):
        if var in anchor_variables:
          z3_var = Real(var)
          feature_expression = z3_var == (instance[i])
          lower_bound = RealVal(instance[i] - float(delta_value))
          upper_bound = RealVal(instance[i] + float(delta_value))
          constraints.append(simplify(And(lower_bound <= z3_var, z3_var <= upper_bound)))

      print(constraints)
    else:
      print("Problema inviável!")
      print(anchor_expressions)
    return

In [47]:
class ExplainerCompleter:
    def __init__(self, model, data, round=None):
        self.model = model

        # model T
        self.T_constraints = feature_constraints_expression(data)
        self.T_model = model_trees_expression(self.model)
        self.T = And(self.T_model, self.T_constraints)

    def explain_instance(self, instance, exp, verbose=False, delta_fix = True):
        opt = Optimize()
        self.exp = exp

        # anchor expressions
        anchor_expressions, anchor_features = anchor_z3_expression(exp.names())
        self.anchor_expressions = anchor_expressions
        self.anchor_features = anchor_features
        opt.add(anchor_expressions)

        # delta
        # delta >= 0
        # todas as features que não estao no anchor > fazer as igualdades delta
        anchor_variables = []
        for formula in anchor_expressions:
            anchor_variables.append(str(formula.arg(0)))

        feature_names = [f"x{i}" for i in range(instance.shape[0])]

        if delta_fix == True:
            delta = Int("delta")
        else:
            delta = Real("delta")
        opt.add(delta >= 0)
        for i, var in enumerate(feature_names):
            if var not in anchor_variables:  # and importance_dic[var] != 0:
                z3_var = Real(var)
                opt.add(
                    (instance[i]) - delta <= z3_var, z3_var <= (instance[i]) + delta
                )
                # print(f'{instance[i]} - {delta} <= {var}, {var} <= {instance[i]} + {delta}')

        # not D
        self.D = decision_function_expression(self.model, [instance])

        # model
        opt.add(self.T)
        opt.add(Not(self.D))

        # minimize delta
        opt.minimize(delta)
        if opt.check() == sat:
            if verbose:
                for var in opt.model():
                    print(var, "=", opt.model()[var])
            print("delta =", opt.model().eval(delta))
        else:
            print("(unsat == correct)")

# Main

In [32]:
set_option(rational_to_decimal=True)

## iris

In [33]:
gb_iris = GradientBoostingClassifier(n_estimators=100, max_depth=3, random_state=101)

iris = load_iris()
X_iris, y_iris = iris.data, iris.target

X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=101
)

gb_iris.fit(X_iris_train, y_iris_train)
y_pred = gb_iris.predict(X_iris)

In [48]:
explainercomp = ExplainerCompleter(gb_iris, X_iris)
iris_features_x = [f"x{i}" for i in range(len(iris.feature_names))]

anchor_explainer = anchor_tabular.AnchorTabularExplainer(
    gb_iris.classes_, iris_features_x, X_iris, categorical_names={}
)

possible_lens = []
possible_exp = []
for i in range(len(X_iris_train)):
    exp = anchor_explainer.explain_instance(
        X_iris_train[i], gb_iris.predict, threshold=0.95
    )
    explainercomp.explain_instance(X_iris_train[i], exp)
    # if len(exp.names()) not in possible_lens:
    #   print((exp.names()))
    #   print(anchor_z3_expression(exp.names()))

delta = 1
delta = 0
delta = 1
delta = 0
(unsat == correct)
delta = 0
delta = 0
delta = 0
delta = 0
delta = 1
delta = 1
(unsat == correct)
(unsat == correct)
delta = 0
delta = 0
(unsat == correct)
delta = 1
delta = 0
(unsat == correct)
delta = 0
delta = 0
delta = 0
(unsat == correct)
(unsat == correct)
delta = 0
delta = 1
delta = 0
(unsat == correct)
(unsat == correct)
delta = 0
(unsat == correct)
delta = 0
delta = 1
delta = 1
delta = 0
(unsat == correct)
delta = 0
(unsat == correct)
(unsat == correct)
(unsat == correct)
delta = 0
delta = 1
delta = 0
delta = 1
(unsat == correct)
(unsat == correct)
(unsat == correct)
delta = 0
(unsat == correct)
(unsat == correct)
delta = 1
delta = 1
(unsat == correct)
delta = 0
delta = 0
delta = 0
(unsat == correct)
(unsat == correct)
delta = 1
delta = 1
(unsat == correct)
delta = 1
delta = 0
delta = 1
(unsat == correct)
(unsat == correct)
delta = 0
delta = 1
delta = 1
delta = 0
delta = 0
delta = 0
delta = 1
(unsat == correct)
delta = 1
delta = 0
delta 

## wine

In [None]:
gb_iris = GradientBoostingClassifier(n_estimators=100, max_depth=3, random_state=101)

iris = load_iris()
X_iris, y_iris = iris.data, iris.target

X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=101
)

gb_iris.fit(X_iris_train, y_iris_train)
y_pred = gb_iris.predict(X_iris)