# Imports

In [136]:
%pip install --upgrade z3-solver



In [137]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_wine
from z3 import *
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree as sk_tree
from matplotlib import pyplot as plt

In [138]:
set_option(rational_to_decimal=True)

# Z3 Functions

(I ^ T -> D)

## function I (instance expression)

In [139]:
def instance_expression(instance):
    formula = [Real(f'x{i}') == value for i, value in enumerate(instance)]
    return formula

## funcion T (tree leafs and constraints expression)

T = T_model ^ T_constraints

In [140]:
def feature_constraints_expression(X):
    constraints = []

    for i in range(X.shape[1]):
        feature_values = X[:, i]
        min_val, max_val = feature_values.min(), feature_values.max()

        x = Real(f'x{i}')
        min = RealVal(min_val)
        max = RealVal(max_val)

        constraint = And(min <= x, x <= max)
        constraints.append(constraint)

    return And(*constraints)

In [141]:
def tree_paths_expression(tree, tree_index, class_index):
    tree_ = tree.tree_
    feature = tree_.feature
    threshold = tree_.threshold
    value = tree_.value

    paths = []
    o = Real(f'o_{tree_index}_{class_index}')
    def traverse(node, path_conditions):

        if feature[node] == -2:
            leaf_value = value[node][0][0]
            path_formula = And(path_conditions)
            implication = Implies(path_formula, o == leaf_value)
            paths.append(implication)
        else:

            x = Real(f'x{feature[node]}')
            left_condition = x <= threshold[node]
            right_condition = x > threshold[node]
            traverse(tree_.children_left[node], path_conditions + [left_condition])
            traverse(tree_.children_right[node], path_conditions + [right_condition])

    traverse(0, [])
    return And(*paths)

In [142]:
def model_trees_expression(model):
    formulas = []
    for i, estimators in enumerate(model.estimators_):
        for class_index, estimator in enumerate(estimators):
            formula = tree_paths_expression(estimator, i, class_index)
            formulas.append(formula)
    return And(*formulas)

## function D (decision function result expression)

In [143]:
def decision_function_expression(model, x):
  learning_rate = model.learning_rate
  decision = model.decision_function(x)
  n_classes = 1 if model.n_classes_ <= 2 else model.n_classes_
  predicted_class = model.predict(x)[0]

  estimators = model.estimators_
  estimator_results = []
  for estimator in estimators:
    class_predictions = [tree.predict(x) for tree in estimator]
    estimator_results.append(class_predictions)

  estimator_sum = np.sum(estimator_results, axis=0) * learning_rate
  init_value = decision - estimator_sum

  equation_list = []
  for class_number in range(n_classes):
    estimator_list = []
    for estimator_number in range(len(estimators)):
      # print(f"o_{estimator_number}_{class_number}")
      o = Real(f'o_{estimator_number}_{class_number}')
      estimator_list.append(o)
    equation_o = Sum(estimator_list) * learning_rate + init_value[0][class_number]
    equation_list.append(equation_o)

  if n_classes <= 2:
    if predicted_class == 0:
      final_equation = equation_list[0] < 0
    else:
      final_equation = equation_list[0] > 0
  else:
    compare_equation = []
    for class_number in range(n_classes):
      if predicted_class != class_number:
        compare_equation.append(equation_list[predicted_class] > equation_list[class_number])
    final_equation = compare_equation

  return And(final_equation)

# Explaination Functions

In [144]:
def is_proved(f):
    s = Solver()
    s.add(Not(f))
    if s.check() == unsat:
        return True
    else:
        return False

In [145]:
def explain(I, T, D):
  X = I.copy()
  relevante = []

  for formula in I:
    X.remove(formula)

    if is_proved(Implies(And(And(X), T), D)):
      continue
      # print('proved')
    else:
      # print('not proved')
      X.append(formula)

  return X

In [146]:
gb = GradientBoostingClassifier(n_estimators = 10)
breast_cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(breast_cancer.data, breast_cancer.target, test_size=0.1, random_state=101)
gb.fit(X_train, y_train)

In [147]:
T_constraints = feature_constraints_expression(breast_cancer.data)
# print(T_constraints)

In [148]:
T_model = model_trees_expression(gb)
# print(T_model)

In [149]:
T = And(T_model, T_constraints)
# print(T)

In [150]:
I = instance_expression(X_test[0])
# print((X_test[0]))
# print(I)

In [151]:
D = decision_function_expression(gb, [X_test[0]])
# print(D)

In [152]:
test = explain(I, T, D)
print(test)

[x22 == 85.56, x23 == 544.1, x24 == 0.1184, x27 == 0.08442]


## outro

In [153]:
def multiclass_initial_prediction(y, n_classes):
    class_counts = np.bincount(y, minlength=n_classes)
    class_probs = class_counts / len(y)

    log_odds = np.log(class_probs + 1e-15)
    return log_odds - log_odds.mean()

In [154]:
def binary_initial_prediction(y):
    pi = np.mean(y)

    initial_log_odds = np.log(pi / (1 - pi))

    return initial_log_odds

In [155]:
def compare_decision_function(model, X, initial_prediction=None):
    learning_rate = model.learning_rate
    estimators = model.estimators_
    estimator_results = []

    if initial_prediction is None and model.init_ != 'zero':
        print("Error - Missing initial_prediction")
        return None

    for estimator in estimators:
        class_predictions = [tree.predict(X) for tree in estimator]
        estimator_results.append(np.array(class_predictions).T)

    final_predictions = np.sum(estimator_results, axis=0) * learning_rate

    if model.init_ != 'zero':
            final_predictions += initial_prediction

    if np.isscalar(initial_prediction) and initial_prediction != None:
      final_predictions = final_predictions.flatten()

    if not np.allclose(final_predictions, model.decision_function(X)):
      print("Error - Deicison Function does not match")

    return final_predictions

In [156]:
def print_init_decision_function(model, X):
  X = [X[1]]

  decision = model.decision_function(X)
  print(decision)

  learning_rate = model.learning_rate
  estimators = model.estimators_
  estimator_results = []
  for estimator in estimators:
        class_predictions = [tree.predict(X) for tree in estimator]
        estimator_results.append(np.array(class_predictions).T)

  final_predictions = np.sum(estimator_results, axis=0) * learning_rate
  print(final_predictions)

  return decision - final_predictions

# Test Function
explicar todas as instancias - tamanho medio da explicação & desvio padrão, porcentagem de redução de features:
 - para binario
 - para multiclasse
 - dataset de imagens
 - reordenar as features e reavaliar

## Multiclass

In [157]:
gb_multiclass = GradientBoostingClassifier(n_estimators = 2)

In [158]:
iris = load_iris()
X_iris, y_iris = iris.data, iris.target
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.1, random_state=101)

In [159]:
gb_multiclass.fit(X_iris_train, y_iris_train)

## Binary

In [160]:
gb_binary = GradientBoostingClassifier(n_estimators = 10)

In [161]:
cancer = load_breast_cancer()
X_cancer, y_cancer = cancer.data, cancer.target
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.01, random_state=101)

In [162]:
gb_binary.fit(X_cancer_train, y_cancer_train)