In [1]:
from xgboost import XGBClassifier
from sklearn.datasets import load_iris, load_breast_cancer, load_wine
from sklearn.model_selection import train_test_split
from xgboost import plot_tree
from z3 import *
import numpy as np
import pandas as pd
import pickle

In [2]:
set_option(rational_to_decimal=True)

# model

In [3]:
# import sys
# import os
# sys.path.append(os.path.abspath('../../../'))

# from model.xai_xgb_z3 import XGBoostExplainer

In [38]:
from z3 import *
import numpy as np


class XGBoostExplainer:
    """Apenas classificação binária e base_score = None
    data = X. labels = y
    """

    def __init__(self, model, data):
        """_summary_

        Args:
            model (XGBoost): xgboost model fited
            data (DataFrame): dataframe (X or X_train)
            labels (array): y (targets)
        """
        self.model = model
        self.data = data.values
        self.columns = model.feature_names_in_.tolist()
        self.max_categories = 2

    def fit(self):
        """Initialize Z3 expressions from model and categoric features from data.
        z3 expressions are built here for pkl compatibility (use fit after export pkl)
        """
        self.categoric_features = self.get_categoric_features(self.data)
        self.T_model = self.model_trees_expression(self.model)
        self.T = self.T_model

    def explain(self, instance, reorder="asc"):
        self.I = self.instance_expression(instance)
        self.D = self.decision_function_expression(self.model, [instance])

        return self.explain_expression(self.I, self.T, self.D, self.model, reorder)

    def get_categoric_features(self, data: np.ndarray):
        """
        Recebe um dataset e retorna uma fórmula no z3 com:
        - Restrições de valor máximo e mínimo para features contínuas.
        - Restrições de igualdade para features categóricas binárias.
        """
        categoric_features = []
        for i in range(data.shape[1]):
            feature_values = data[:, i]
            unique_values = np.unique(feature_values)
            if len(unique_values) <= self.max_categories:
                categoric_features.append(self.columns[i])

        return categoric_features

    def model_trees_expression(self, model):
        """
        Constrói expressões lógicas para todas as árvores de decisão em um dataframe de XGBoost.
        Para árvores que são apenas folhas, gera diretamente um And com o valor da folha.

        Args:
            df (pd.DataFrame): Dataframe contendo informações das árvores.
            class_index (int): Índice da classe atual.

        Returns:
            z3.ExprRef: Fórmula representando todos os caminhos de todas as árvores.
        """
        df = model.get_booster().trees_to_dataframe()
        df["Split"] = df["Split"].round(4)
        self.booster_df = df
        class_index = 0  # if model.n_classes_ == 2:
        all_tree_formulas = []

        for tree_index in df["Tree"].unique():
            tree_df = df[df["Tree"] == tree_index]
            o = Real(f"o_{tree_index}_{class_index}")

            if len(tree_df) == 1 and tree_df.iloc[0]["Feature"] == "Leaf":
                leaf_value = tree_df.iloc[0]["Gain"]
                all_tree_formulas.append(And(o == leaf_value))
                continue
            path_formulas = []

            def get_conditions(node_id):
                conditions = []
                current_node = tree_df[tree_df["ID"] == node_id]
                if current_node.empty:
                    return conditions

                parent_node = tree_df[
                    (tree_df["Yes"] == node_id) | (tree_df["No"] == node_id)
                ]
                if not parent_node.empty:
                    parent_data = parent_node.iloc[0]
                    feature = parent_data["Feature"]
                    split_value = parent_data["Split"]
                    x = Real(feature)
                    if parent_data["Yes"] == node_id:
                        conditions.append(x < split_value)
                    else:
                        conditions.append(x >= split_value)
                    conditions = get_conditions(parent_data["ID"]) + conditions

                return conditions

            for _, node in tree_df[tree_df["Feature"] == "Leaf"].iterrows():
                leaf_value = node["Gain"]
                leaf_id = node["ID"]
                conditions = get_conditions(leaf_id)
                path_formula = And(*conditions)
                implication = Implies(path_formula, o == leaf_value)
                path_formulas.append(implication)

            all_tree_formulas.append(And(*path_formulas))
        return And(*all_tree_formulas)

    def decision_function_expression(self, model, x):
        n_classes = 1 if model.n_classes_ <= 2 else model.n_classes_
        predicted_class = model.predict(x)[0]
        n_estimators = len(model.get_booster().get_dump())

        estimator_pred = Solver()
        estimator_pred.add(self.I)
        estimator_pred.add(self.T)
        variables = [Real(f"o_{i}_0") for i in range(n_estimators)]
        if estimator_pred.check() == sat:
            solvermodel = estimator_pred.model()
            total_sum = sum(
                float(solvermodel.eval(var).as_fraction()) for var in variables
            )
        else:
            total_sum = 0
            print("estimator error")
        init_value = model.predict(x, output_margin=True)[0] - total_sum
        print(init_value)

        equation_list = []
        for class_number in range(n_classes):
            estimator_list = []
            for estimator_number in range(
                int(len(model.get_booster().get_dump()) / n_classes)
            ):
                o = Real(f"o_{estimator_number}_{class_number}")
                estimator_list.append(o)
            equation_o = Sum(estimator_list) + init_value
            equation_list.append(equation_o)

        if n_classes <= 2:
            if predicted_class == 0:
                final_equation = equation_list[0] < 0
            else:
                final_equation = equation_list[0] > 0
        else:
            compare_equation = []
            for class_number in range(n_classes):
                if predicted_class != class_number:
                    compare_equation.append(
                        equation_list[predicted_class] > equation_list[class_number]
                    )
            final_equation = And(compare_equation)

        return final_equation

    def instance_expression(self, instance):
        formula = [Real(self.columns[i]) == value for i, value in enumerate(instance)]
        return formula

    def explain_expression(self, I, T, D, model, reorder):
        i_expression = I.copy()
        T_s = T
        D_s = D

        importances = model.feature_importances_
        non_zero_indices = np.where(importances != 0)[0]

        if reorder == "asc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]
        elif reorder == "desc":
            sorted_feature_indices = non_zero_indices[
                np.argsort(-importances[non_zero_indices])
            ]
            i_expression = [i_expression[i] for i in sorted_feature_indices]

        for feature in i_expression.copy():

            i_expression.remove(feature)

            # prove(Implies(And(And(i_expression), T), D))
            if self.is_proved(Implies(And(And(i_expression), T_s), D_s)):
                continue
                # print('proved')
            else:
                # print('not proved')
                i_expression.append(feature)
        # print(self.is_proved(Implies(And(And(i_expression), T_s), D_s)))
        return i_expression

    def is_proved(self, f):
        s = Solver()
        s.add(Not(f))
        if s.check() == unsat:
            return True
        else:
            return False

    def delta_expression(self, exp):
        expressions = []
        delta = Real("delta")

        self.delta_features = []
        for name in exp:
            tokens = name.split(" == ")
            z3feature = Real(tokens[0])
            self.delta_features.append(str(z3feature))
            value = tokens[1]

            if tokens[0] in self.categoric_features:
                expressions.append(z3feature == float(value))
            else:
                expressions.append(z3feature >= float(value) - delta)
                expressions.append(z3feature <= float(value) + delta)

        expressions.append(delta >= 0)
        self.deltaexp = expressions
        return expressions

    def get_delta(self, exp):
        expstr = []
        for expression in exp:
            expstr.append(str(expression))
        self.delta_expressions = self.delta_expression(expstr)
        
        opt = Optimize()
        opt.add(self.delta_expressions)
        opt.add(self.T)
        opt.add(Not(self.D))

        delta = Real("delta")
        expmin = opt.minimize(delta)
        opt.check()

        # rangemodel = opt.model()

        value = str(expmin.value())

        if "+ epsilon" in value:
            delta_value = float(value.split(" + ")[0])
        elif "epsilon" == value:
            delta_value = 0
        else:
            delta_value = float(value) - 0.01
        return delta_value

    def explain_range(self, instance, reorder="asc", dataset_bounds=True, ):
        exp = self.explain(instance, reorder)
        if exp != []:
            delta_value = self.get_delta(exp)
            
            if delta_value == 0:
                expstr = []
                for exppart in exp:
                    expstr.append(str(exppart))
                return expstr
            # save_deltas for model comparison

            range_exp = []
            for item in exp:
                name = str(item.arg(0))
                if name not in self.categoric_features:
                    itemvalue = float(item.arg(1).as_fraction())
                    lower = itemvalue - delta_value
                    upper = itemvalue + delta_value

                    if dataset_bounds == True:
                        idx = list(self.columns).index(name)
                        min_idx = np.min(self.data[:, idx])
                        max_idx = np.max(self.data[:, idx])
                        if lower < min_idx:
                            lower = min_idx
                        if upper > max_idx:
                            upper = max_idx

                    range_exp.append(f"{lower} <= {name} <= {upper}")
                else:
                    range_exp.append(f"{name} == {item.arg(1)}")

            return range_exp
        else:
            return exp
        
    def explain_range_other():
        pass

# pred

In [39]:
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = wine.target

y = np.where(y == 0, 0, 1)  # converte em binario
# X = X.iloc[:, :2] # corta colunas do df

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=101
)

xgbc = XGBClassifier(n_estimators=20, max_depth=5, learning_rate=0.1)
xgbc.fit(X_train, y_train)

preds = xgbc.predict(X_test)
preds

array([0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0])

In [40]:
booster = xgbc.get_booster()
booster

<xgboost.core.Booster at 0x1569240d7c0>

In [41]:
# for i in range(3):
#        plot_tree(xgbc, num_trees=i)

In [42]:
trees = booster.get_dump(with_stats=True)
for i, tree in enumerate(trees):
    print(f"Tree {i}:")
    print(tree)

Tree 0:
0:[proline<770] yes=1,no=2,missing=2,gain=98.9444351,cover=31.1348114
	1:leaf=0.140779421,cover=19.2948132
	2:[flavanoids<2.28999996] yes=3,no=4,missing=4,gain=21.9696541,cover=11.8399982
		3:leaf=0.0646421313,cover=1.97333312
		4:leaf=-0.270458966,cover=9.86666584

Tree 1:
0:[proline<770] yes=1,no=2,missing=2,gain=80.0747452,cover=30.8850746
	1:leaf=0.134412304,cover=18.2871494
	2:[flavanoids<2.17000008] yes=3,no=4,missing=4,gain=16.8105927,cover=12.5979252
		3:leaf=0.0870177448,cover=1.49905205
		4:[magnesium<126] yes=5,no=6,missing=6,gain=1.48054123,cover=11.0988731
			5:leaf=-0.236294851,cover=9.9362278
			6:leaf=-0.0528627001,cover=1.16264558

Tree 2:
0:[proline<770] yes=1,no=2,missing=2,gain=66.4681015,cover=30.2188606
	1:leaf=0.129026279,cover=17.2411041
	2:[flavanoids<2.17000008] yes=3,no=4,missing=4,gain=13.5625763,cover=12.9777565
		3:leaf=0.0835888311,cover=1.44813657
		4:[color_intensity<3.74000001] yes=5,no=6,missing=6,gain=1.46408463,cover=11.5296202
			5:leaf=-0.

In [43]:
booster.get_dump()

['0:[proline<770] yes=1,no=2,missing=2\n\t1:leaf=0.140779421\n\t2:[flavanoids<2.28999996] yes=3,no=4,missing=4\n\t\t3:leaf=0.0646421313\n\t\t4:leaf=-0.270458966\n',
 '0:[proline<770] yes=1,no=2,missing=2\n\t1:leaf=0.134412304\n\t2:[flavanoids<2.17000008] yes=3,no=4,missing=4\n\t\t3:leaf=0.0870177448\n\t\t4:[magnesium<126] yes=5,no=6,missing=6\n\t\t\t5:leaf=-0.236294851\n\t\t\t6:leaf=-0.0528627001\n',
 '0:[proline<770] yes=1,no=2,missing=2\n\t1:leaf=0.129026279\n\t2:[flavanoids<2.17000008] yes=3,no=4,missing=4\n\t\t3:leaf=0.0835888311\n\t\t4:[color_intensity<3.74000001] yes=5,no=6,missing=6\n\t\t\t5:leaf=-0.0430838913\n\t\t\t6:leaf=-0.20804821\n',
 '0:[proline<770] yes=1,no=2,missing=2\n\t1:leaf=0.124404095\n\t2:[flavanoids<2.17000008] yes=3,no=4,missing=4\n\t\t3:leaf=0.080420211\n\t\t4:[color_intensity<3.74000001] yes=5,no=6,missing=6\n\t\t\t5:leaf=-0.0405692905\n\t\t\t6:leaf=-0.186384767\n',
 '0:[proline<770] yes=1,no=2,missing=2\n\t1:leaf=0.120385185\n\t2:[flavanoids<2.28999996] yes=

In [44]:
print(type(booster))

<class 'xgboost.core.Booster'>


In [45]:
X_test.values[0]

array([1.305e+01, 1.650e+00, 2.550e+00, 1.800e+01, 9.800e+01, 2.450e+00,
       2.430e+00, 2.900e-01, 1.440e+00, 4.250e+00, 1.120e+00, 2.510e+00,
       1.105e+03])

# explain

In [46]:
explainer = XGBoostExplainer(xgbc, X)
explainer.fit()

In [47]:
i = 2
print(explainer.explain(X_test.values[i]))
print("delta=", explainer.explain_range(X_test.values[i]))

0.7323942
[proline == 650]
0.7323942
delta= ['530.01 <= proline <= 769.99']


In [48]:
deltatest = 120
valormaior = xgbc.predict(
    np.array(
        [
            1.305e01,
            1.650e00,
            2.550e00,
            1.800e01,
            9.800e01,
            2.450e00,
            2.430e00,
            2.900e-01,
            1.440e00,
            4.250e00,
            1.120e00,
            2.510e00,
            650 + deltatest,
        ]
    ).reshape(1, -1)
)
valormenor = xgbc.predict(
    np.array(
        [
            1.305e01,
            1.650e00,
            2.550e00,
            1.800e01,
            9.800e01,
            2.450e00,
            2.430e00,
            2.900e-01,
            1.440e00,
            4.250e00,
            1.120e00,
            2.510e00,
            650 - deltatest,
        ]
    ).reshape(1, -1)
)

print(valormaior, valormenor)

[0] [1]


In [49]:
# for i in range(len(X_test)):
#     print('features', len(explainer.explain(X_test.values[i])), '| delta=', explainer.explain_range(X_test.values[i]))

In [50]:
# count = 0
# for i in range(len(X_test)):
#     delt1 = explainer.explain_range(X_test.values[i])
#     valor = float(X_test['petal length (cm)'].values[i])

#     valormaior = xgbc.predict(np.array([0 , 0, valor + delt1, 0]).reshape(1, -1))
#     valormenor = xgbc.predict(np.array([0 , 0, valor - delt1, 0]).reshape(1, -1))

#     if valormenor != valormaior:
#         count += 1
# print(count)

In [51]:
X.columns

Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium',
       'total_phenols', 'flavanoids', 'nonflavanoid_phenols',
       'proanthocyanins', 'color_intensity', 'hue',
       'od280/od315_of_diluted_wines', 'proline'],
      dtype='object')

In [52]:
print(X_test.values[0])
print(y_test[0])
print(xgbc.predict([X_test.values[0]]))

[1.305e+01 1.650e+00 2.550e+00 1.800e+01 9.800e+01 2.450e+00 2.430e+00
 2.900e-01 1.440e+00 4.250e+00 1.120e+00 2.510e+00 1.105e+03]
0
[0]


In [53]:
# filter_condition = X["petal length (cm)"] == 1.4
# filtered_X = X[filter_condition]
# filtered_y = y[filter_condition]
# print("Entradas em X:")
# print(filtered_X)

# print("\nCorrespondências em y:")
# print(filtered_y)

# print("\n Predictions")
# print(xgbc.predict(filtered_X))

## print expresions

In [54]:
xgbc.get_booster().trees_to_dataframe()

Unnamed: 0,Tree,Node,ID,Feature,Split,Yes,No,Missing,Gain,Cover,Category
0,0,0,0-0,proline,770.00,0-1,0-2,0-2,98.944435,31.134811,
1,0,1,0-1,Leaf,,,,,0.140779,19.294813,
2,0,2,0-2,flavanoids,2.29,0-3,0-4,0-4,21.969654,11.839998,
3,0,3,0-3,Leaf,,,,,0.064642,1.973333,
4,0,4,0-4,Leaf,,,,,-0.270459,9.866666,
...,...,...,...,...,...,...,...,...,...,...,...
127,19,2,19-2,Leaf,,,,,-0.094247,3.920163,
128,19,3,19-3,Leaf,,,,,0.088128,5.392917,
129,19,4,19-4,alcalinity_of_ash,19.50,19-5,19-6,19-6,0.587271,2.180460,
130,19,5,19-5,Leaf,,,,,-0.053039,1.015403,


In [55]:
print(explainer.T_model)

And(And(Implies(And(proline < 770), o_0_0 == 0.140779421),
        Implies(And(proline >= 770, flavanoids < 2.29),
                o_0_0 == 0.0646421313),
        Implies(And(proline >= 770, flavanoids >= 2.29),
                o_0_0 == -0.270458966)),
    And(Implies(And(proline < 770), o_1_0 == 0.134412304),
        Implies(And(proline >= 770, flavanoids < 2.17),
                o_1_0 == 0.0870177448),
        Implies(And(proline >= 770,
                    flavanoids >= 2.17,
                    magnesium < 126),
                o_1_0 == -0.236294851),
        Implies(And(proline >= 770,
                    flavanoids >= 2.17,
                    magnesium >= 126),
                o_1_0 == -0.0528627001)),
    And(Implies(And(proline < 770), o_2_0 == 0.129026279),
        Implies(And(proline >= 770, flavanoids < 2.17),
                o_2_0 == 0.0835888311),
        Implies(And(proline >= 770,
                    flavanoids >= 2.17,
                    color_intensity < 3.74),
     

In [56]:
xgbc.predict([X_test.values[0]], output_margin=True)

array([-1.9658402], dtype=float32)

In [57]:
-0.265196383 + -0.225890428 + -0.199127138 + y.mean()

np.float64(-0.021674623157303397)

In [58]:
print(explainer.D)

o_0_0 +
o_1_0 +
o_2_0 +
o_3_0 +
o_4_0 +
o_5_0 +
o_6_0 +
o_7_0 +
o_8_0 +
o_9_0 +
o_10_0 +
o_11_0 +
o_12_0 +
o_13_0 +
o_14_0 +
o_15_0 +
o_16_0 +
o_17_0 +
o_18_0 +
o_19_0 +
0.7323942 >
0


In [59]:
print(explainer.I)

[alcohol == 13.32, malic_acid == 3.24, ash == 2.38, alcalinity_of_ash == 21.5, magnesium == 92, total_phenols == 1.93, flavanoids == 0.76, nonflavanoid_phenols == 0.45, proanthocyanins == 1.25, color_intensity == 8.42, hue == 0.55, od280/od315_of_diluted_wines == 1.62, proline == 650]


In [60]:
if explainer.delta_expressions:
    print(explainer.delta_expressions)

[proline >= 650 - delta, proline <= 650 + delta, delta >= 0]


# check explainer correct

In [61]:
def check_correct_explanation(exp, explainer):
    opt = Optimize()

    exprange_z3 = []
    exptokens = []
    for item in exp:
        item = str(item)
        if "<=" in item:
            tokens = item.split(" <= ")
            exprange_z3.append((tokens[0]) <= Real(tokens[1]))
            exprange_z3.append(Real(tokens[1]) <= (tokens[2]))
            exptokens.append(tokens[1])
        else:
            tokens = item.split(" == ")
            exprange_z3.append(Real(tokens[0]) == (tokens[1]))
            exptokens.append(tokens[0])
    opt.add(exprange_z3)

    inst = explainer.I
    deltaexp = []
    for item in inst:
        item = str(item)
        tokens = item.split(" == ")
        if tokens[0] not in exptokens:
            if tokens[0] in explainer.categoric_features:
                deltaexp.append(Real(tokens[0]) == (tokens[1]))
            else:
                deltaexp.append(Real(tokens[0]) >= (tokens[1]) - Real("delta"))
                deltaexp.append(Real(tokens[0]) <= (tokens[1]) + Real("delta"))
    opt.add(deltaexp)

    opt.add(explainer.T_model)

    opt.add(Not(explainer.D))

    opt.add(Real("delta") >= 0)

    delta = Real("delta")
    expmin = opt.minimize(delta)

    printlist = []
    if opt.check() == sat:
        for var in opt.model():
            if str(var) in explainer.columns:
                printlist.append(f"{var} = {opt.model()[var]}")
        printlist.append(f"delta = {opt.model().eval(delta)}")
    else:
        printlist.append("unsat == correct")
    #   print(printlist)
    value = str(expmin.value())
    #   print(value)
    return printlist, exprange_z3, deltaexp, explainer.T, Not(explainer.D)

In [62]:
explainer = XGBoostExplainer(xgbc, X)
explainer.fit()

In [63]:
exprange = explainer.explain_range(X.values[3])
print(exprange)

0.73239446
['2.33 <= flavanoids <= 4.65', '1478.84 <= proline <= 1481.16']


In [64]:
ans, ansrange, ansdelta, anst, ansnotd = check_correct_explanation(exprange, explainer)
print(ans)

['unsat == correct']


In [None]:
count = 0
explanationstest = []
for i in range(0, len(X)):
    exprange = explainer.explain_range(X.values[i])
    explanationstest.append(exprange)
    ans, ansrange, ansdelta, anst, ansnotd = check_correct_explanation(
        exprange, explainer
    )
    if ans[0] == ("unsat == correct"):
        count += 1
    else:
        print(ans)
count, len(X)

0.7323941
0.732394
14
['hue = 1.25', 'flavanoids = 2.3299999999?', 'color_intensity = 3.7399999999?', 'proanthocyanins = -24.72', 'od280/od315_of_diluted_wines = 2.6899999999?', 'alcalinity_of_ash = 20.4', 'nonflavanoid_phenols = -25.74', 'alcohol = 13.0299999999?', 'magnesium = 126', 'proline = 1049.57', 'malic_acid = 2.31', 'ash = -23.86', 'total_phenols = 3.27', 'delta = 26']
0.7323941
0.73239446
0.73239434
0.73239446
0.732394
0.732394
14
['hue = 1.25', 'flavanoids = 2.3299999999?', 'color_intensity = 3.7399999999?', 'proanthocyanins = -3.75', 'od280/od315_of_diluted_wines = 2.6899999999?', 'alcalinity_of_ash = 20.4', 'nonflavanoid_phenols = -4.69', 'alcohol = 13.0299999999?', 'magnesium = 126', 'proline = 1294.82', 'malic_acid = 2.31', 'ash = -2.39', 'total_phenols = 3.27', 'delta = 5']
0.732394
0.732394
0.73239446
0.732394
0.732394
14
['hue = 1.25', 'flavanoids = 2.3299999999?', 'color_intensity = 3.7399999999?', 'proanthocyanins = -35.19', 'od280/od315_of_diluted_wines = 2.689999

(173, 178)

In [66]:
exprange = explainer.explain_range(X.values[170])
print(exprange)

0.7323942
['278.0 <= proline <= 769.99']


In [67]:
import re
from collections import defaultdict


def extract_ranges(explanations):
    feature_ranges = defaultdict(lambda: [float("inf"), float("-inf")])

    for explanation in explanations:
        # print(explanation)
        for rule in explanation:
            # print(f"Processing rule: {rule}")  # Debugging line
            match = re.search(r"([\d\.]+) <= ([a-zA-Z_]+) <= ([\d\.]+)", rule)
            eq_match = re.search(r"([a-zA-Z_]+) == ([\d\.]+)", rule)

            if match:
                min_val, feature, max_val = match.groups()
                min_val, max_val = float(min_val), float(max_val)
                feature_ranges[feature][0] = min(feature_ranges[feature][0], min_val)
                feature_ranges[feature][1] = max(feature_ranges[feature][1], max_val)
                # print(f"Updated range for {feature}: {feature_ranges[feature]}")  # Debugging line

            if eq_match:
                feature, value = eq_match.groups()
                value = float(value)
                feature_ranges[feature][0] = min(feature_ranges[feature][0], value)
                feature_ranges[feature][1] = max(feature_ranges[feature][1], value)
                # print(f"Updated exact value for {feature}: {feature_ranges[feature]}")  # Debugging line

    return dict(feature_ranges)


feature_ranges = extract_ranges(explanationstest)
print(feature_ranges)

{'flavanoids': [0.34, 5.08], 'proline': [278.0, 1680.0], 'alcohol': [12.46, 14.83], 'magnesium': [93.98, 139.0], 'color_intensity': [3.35, 3.97], 'hue': [1.31, 1.31], 'alcalinity_of_ash': [30.0, 30.0], 'total_phenols': [3.3, 3.3]}


In [68]:
X.describe().loc[["min", "max"]]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
min,11.03,0.74,1.36,10.6,70.0,0.98,0.34,0.13,0.41,1.28,0.48,1.27,278.0
max,14.83,5.8,3.23,30.0,162.0,3.88,5.08,0.66,3.58,13.0,1.71,4.0,1680.0


In [69]:
def compare_with_describe(feature_ranges, df):
    describe_stats = df.describe().loc[["min", "max"]]

    for feature, (min_val, max_val) in feature_ranges.items():
        if feature in describe_stats.columns:
            min_desc = describe_stats.loc["min", feature]
            max_desc = describe_stats.loc["max", feature]

            if min_desc <= min_val <= max_desc and min_desc <= max_val <= max_desc:
                print(f"{feature}: Values are within range")
            else:
                print(
                    f"{feature}: Values are OUT of range ({min_val}, {max_val}) vs ({min_desc}, {max_desc})"
                )
        else:
            print(f"{feature}: Not found in DataFrame")


compare_with_describe(feature_ranges, X.describe().loc[["min", "max"]])

flavanoids: Values are within range
proline: Values are within range
alcohol: Values are within range
magnesium: Values are within range
color_intensity: Values are within range
hue: Values are within range
alcalinity_of_ash: Values are within range
total_phenols: Values are within range


In [70]:
for expp in explanationstest:
    print(expp)

['2.33 <= flavanoids <= 3.79', '1064.27 <= proline <= 1065.73']
['2.3299999999999996 <= flavanoids <= 3.19', '1049.57 <= proline <= 1050.43']
['2.33 <= flavanoids <= 4.15', '1184.09 <= proline <= 1185.91']
['2.33 <= flavanoids <= 4.65', '1478.84 <= proline <= 1481.16']
['700.01 <= proline <= 769.99']
['2.33 <= flavanoids <= 4.45', '1448.94 <= proline <= 1451.06']
['2.33 <= flavanoids <= 2.71', '1289.81 <= proline <= 1290.19']
['2.3299999999999996 <= flavanoids <= 2.69', '1294.82 <= proline <= 1295.18']
['2.33 <= flavanoids <= 3.63', '1044.35 <= proline <= 1045.65']
['2.33 <= flavanoids <= 3.9699999999999998', '1044.18 <= proline <= 1045.82']
['2.33 <= flavanoids <= 4.31', '1509.01 <= proline <= 1510.99']
['2.33 <= flavanoids <= 2.5300000000000002', '1279.9 <= proline <= 1280.1']
['2.3299999999999996 <= flavanoids <= 3.19', '1319.57 <= proline <= 1320.43']
['2.33 <= flavanoids <= 5.05', '1148.64 <= proline <= 1151.36']
['2.33 <= flavanoids <= 4.95', '1545.69 <= proline <= 1548.31']
['2.