In [1]:
# pip install -U ruletree

In [2]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from IPython.display import IFrame, JSON
from RuleTree import RuleTreeClassifier
from RuleTree.encoding.ruletree_to_jakowski_tree_encoder import ruletree_to_jakowski
from RuleTree.encoding.ruletree_to_jakowski_tree_encoder import deshift_jakowski_encoding

In [3]:
def print_np(arr, r=1):
    for i in range(arr.shape[0]):
        print('[', end='')
        for j in range(arr.shape[1]):
            print(f"{'' if i == 0 or arr[i-1, j]*arr[i,j]>=0 else ' '}{round(arr[i, j], r)},\t", end='')
        print(']')

In [4]:
iris = datasets.load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

X = iris_df.drop('target', axis=1).values
y = iris_df['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [5]:
rt = RuleTreeClassifier(max_depth=8, prune_useless_leaves=False, random_state=0)
rt.fit(X_train, y_train)
#rt.root.simplify()

print(classification_report(y_pred=rt.predict(X_test), y_true=y_test))

y_pred_before_encoding = rt.predict(X_test)
y_pred_proba_before_encoding = rt.predict_proba(X_test)

rt.export_graphviz(filename="demo")#, columns_names=iris_df.columns[:-1])
IFrame("demo.pdf", width=600, height=300)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [6]:
json = rt.to_dict(filename="demo.json")



In [7]:
enc = ruletree_to_jakowski(json)
print_np(enc, 3)

[4.0,	4.0,	3.0,	4.0,	4.0,	4.0,	4.0,	4.0,	4.0,	4.0,	4.0,	4.0,	2.0,	4.0,	4.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	-1.0,	]
[0.75,	0.75,	4.95,	0.75,	0.75,	1.65,	1.75,	0.75,	0.75,	0.75,	0.75,	1.65,	3.1,	1.65,	1.75,	 1.0,	 1.0,	 1.0,	 1.0,	 1.0,	 1.0,	 1.0,	 1.0,	 2.0,	 2.0,	 3.0,	 2.0,	 3.0,	 2.0,	 3.0,	 3.0,	]


In [8]:
enc

array([[ 4.        ,  4.        ,  3.        ,  4.        ,  4.        ,
         4.        ,  4.        ,  4.        ,  4.        ,  4.        ,
         4.        ,  4.        ,  2.        ,  4.        ,  4.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        , -1.        , -1.        , -1.        , -1.        ,
        -1.        ],
       [ 0.75      ,  0.75      ,  4.95000005,  0.75      ,  0.75      ,
         1.65000004,  1.75      ,  0.75      ,  0.75      ,  0.75      ,
         0.75      ,  1.65000004,  3.10000002,  1.65000004,  1.75      ,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ,
         1.        ,  1.        ,  1.        ,  2.        ,  2.        ,
         3.        ,  2.        ,  3.        ,  2.        ,  3.        ,
         3.        ]])

In [9]:
def get_col(n):
    if n == 0:
        return "R"
    label = ""
    while n > 0:
        label = ("l" if n % 2 == 1 else "r") + label
        n = (n - 1) // 2
    return "R" + label

def generate_node_list(matrix):
    nodes = []
    for pos, (feat, thr) in enumerate(matrix.T):
        if feat != thr and feat != -1: #not leaf 
            nodes.append({
                'node_id': get_col(pos),
                'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
                'feature_idx': feat,
                'threshold': thr,
                'is_leaf': False,
                'left_node': get_col(pos)+'l',
                'right_node': get_col(pos)+'r',
                'is_categorical': False,
            })
        else: 
            nodes.append({
                'node_id': get_col(pos),
                'prediction': thr,
                'stump_type': '',
                'is_leaf': True,
            })
    return nodes

In [10]:


es_tree = {
    'tree_type': 'RuleTree.tree.RuleTreeClassifier',
    'nodes': generate_node_list(deshift_jakowski_encoding(enc)),
}

JSON(es_tree)

<IPython.core.display.JSON object>

In [11]:
import json

with open("demo_import.json", 'w') as f:
    json.dump(es_tree, f)

In [12]:
rt_2 = RuleTreeClassifier.from_dict("demo_import.json")
rt_2.classes_ = rt.classes_

RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier
RuleTree.stumps.classification.DecisionTreeStumpClassifier


In [13]:
rt_2.export_graphviz(filename="demo")
IFrame("demo.pdf", width=600, height=300)

In [14]:
# rt_2.root = rt_2.root.simplify()

In [15]:
rt_2.export_graphviz(filename="demo")
IFrame("demo.pdf", width=600, height=300)

In [16]:
print(classification_report(y_pred=rt_2.predict(X_test), y_true=y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [17]:
y_pred_after_encoding = rt_2.predict(X_test)
y_pred_proba_after_encoding = rt_2.predict_proba(X_test)

In [18]:
np.allclose(y_pred_before_encoding, y_pred_after_encoding), np.allclose(y_pred_proba_before_encoding, y_pred_proba_after_encoding)

(True, False)

In [20]:
y_pred_proba_before_encoding

array([[0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.]])