# Converting a RuleTreeClassifier to a matrix

Reference: https://blog.dailydoseofds.com/p/transform-decision-tree-into-matrix

In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [22]:
df = pd.read_csv("../datasets/CLF/iris.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [23]:
from RuleTree import RuleTreeClassifier

rt = RuleTreeClassifier(
    max_depth=2,
    random_state=42,
).fit(X_train, y_train)

print(classification_report(y_test, rt.predict(X_test)))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       0.86      0.80      0.83        15
 Iris-virginica       0.81      0.87      0.84        15

       accuracy                           0.89        45
      macro avg       0.89      0.89      0.89        45
   weighted avg       0.89      0.89      0.89        45



In [24]:
rt.export_graphviz(filename="matrix_demo_tree")

from IPython.display import display_pdf
with open('matrix_demo_tree.pdf', "rb") as f:
    display_pdf(f.read(),raw=True)

In [25]:
predicates = rt.get_predicates()
predicates

{'R': <RuleTree.tree.RuleTreeNode.RuleTreeNode at 0x307594ec0>,
 'Rr': <RuleTree.tree.RuleTreeNode.RuleTreeNode at 0x306cf8c50>}

In [26]:
leaf_nodes = rt.get_leaf_nodes()
leaf_nodes

{'Rl': <RuleTree.tree.RuleTreeNode.RuleTreeNode at 0x3068db0e0>,
 'Rrl': <RuleTree.tree.RuleTreeNode.RuleTreeNode at 0x306cf8aa0>,
 'Rrr': <RuleTree.tree.RuleTreeNode.RuleTreeNode at 0x305a1b920>}

RuleTree.utils.tree_utils implements useful function to convert a RuleTree into a matrix representation. 

We are going to use 5 matricies:
1. *A*: This matrix captures the relationship between input features (rows) and evaluation nodes/predicates (columns)
2. *B*: The entries of matrix B are the threshold value at each predicate
3. *C*: This is a matrix between every pair of leaf nodes and evaluation predicates
4. *D*: The entries of vector D are the sum of non-negative entries in every column of Matrix C
5. *E*: This matrix holds the mapping between leaf nodes and their corresponding output labels

In [27]:
from RuleTree.utils.tree_utils import \
    get_feature_node_matrix, get_thresholds_matrix, get_leaf_internal_node_matrix, get_leaf_prediction_matrix

A = get_feature_node_matrix(predicates)
B = get_thresholds_matrix(predicates)
C, _ = get_leaf_internal_node_matrix(leaf_nodes)
D = np.copy(C)
D[D<0] = 0
D = np.sum(D, axis=0)
E = get_leaf_prediction_matrix(leaf_nodes, return_proba=True)

print("A:\r\n", A)
print("\r\nB:\r\n", B)
print("\r\nC:\r\n", C)
print("\r\nD:\r\n", D)
print("\r\nE:\r\n", E)

A:
 [[0 0]
 [0 0]
 [1 0]
 [0 1]]

B:
 [2.44999999 1.55000001]

C:
 [[ 1 -1 -1]
 [ 0  1 -1]]

D:
 [1 1 0]

E:
 [[1.         0.         0.        ]
 [0.         0.97058824 0.02941176]
 [0.         0.05555556 0.94444444]]


In [28]:
y_pred = ((( X_test @ A < B) @ C) == D) @ E
y_pred = rt.classes_[np.argmax(y_pred, axis=1)]

print(classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       0.86      0.80      0.83        15
 Iris-virginica       0.81      0.87      0.84        15

       accuracy                           0.89        45
      macro avg       0.89      0.89      0.89        45
   weighted avg       0.89      0.89      0.89        45

