In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from RuleTree import RuleTreeClassifier
from sklearn.ensemble import BaggingClassifier
from RuleTree.encoding.dict_utils import make_complete_rule_tree

In [2]:
from sklearn import datasets
iris = datasets.load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

X = iris_df.drop('target', axis=1).values
y = iris_df['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
rf = BaggingClassifier(RuleTreeClassifier(max_depth=5, random_state=42), n_estimators=10, random_state=42)

rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9333333333333333

In [3]:
rf.n_classes_, rf.n_features_in_

(3, 4)

In [4]:
i = 0
rf.estimators_[i].export_graphviz(filename=f"rf_{i}")

In [5]:
from RuleTree.encoding.rf_utils import complete_forest
rf_complete = complete_forest(rf)
rf_complete.estimators_[i].export_graphviz(filename=f"rf_complete_{i}")
rf_complete.score(X_test, y_test)

0.9333333333333333

In [6]:
from RuleTree.encoding.rf_utils import simplify_forest
rf_simplified = simplify_forest(rf_complete)
rf_simplified.estimators_[i].export_graphviz(filename=f"rf_simplified_{i}")
rf_simplified.score(X_test, y_test)

0.9333333333333333

In [7]:
from RuleTree.encoding.jankowski import jankowski_to_list_of_dicts, list_of_dicts_to_jankowski
from RuleTree.encoding.rf_utils import random_forest_to_list_of_dicts, list_of_dicts_to_random_forest

In [8]:
dicts_complete = random_forest_to_list_of_dicts(rf_complete)

In [9]:
encodings = list_of_dicts_to_jankowski(dicts_complete)
encodings.shape

(10, 2, 63)

In [10]:
dicts_complete_recon = jankowski_to_list_of_dicts(encodings, dicts_complete)
dicts_complete_recon

[{'tree_type': 'RuleTree.tree.RuleTreeClassifier',
  'nodes': [{'node_id': 'R',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 3.0,
    'threshold': 1.550000011920929,
    'is_leaf': False,
    'left_node': 'Rl',
    'right_node': 'Rr',
    'is_categorical': False,
    'prediction_probability': [nan, nan, nan]},
   {'node_id': 'Rl',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 3.0,
    'threshold': 0.7000000029802322,
    'is_leaf': False,
    'left_node': 'Rll',
    'right_node': 'Rlr',
    'is_categorical': False,
    'prediction_probability': [nan, nan, nan]},
   {'node_id': 'Rr',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 0.0,
    'threshold': 6.099999904632568,
    'is_leaf': False,
    'left_node': 'Rrl',
    'right_node': 'Rrr',
    'is_categorical': False,
    'prediction_probability': [nan, nan, nan]},
   {'node_id': 

In [11]:
dicts_complete_recon[0].keys()

dict_keys(['tree_type', 'nodes', 'args', 'classes_', 'n_classes_'])

In [12]:
dicts_complete[0].keys()

dict_keys(['tree_type', 'args', 'classes_', 'n_classes_', 'nodes'])

In [13]:
rf_complete_recon = list_of_dicts_to_random_forest(dicts_complete_recon, rf_complete)
rf_complete_recon.estimators_[i].export_graphviz(filename=f"rf_complete_recon_{i}")
rf_complete_recon.score(X_test, y_test)

0.9333333333333333

In [14]:
dicts_complete_recon[0]["nodes"]

[{'node_id': 'R',
  'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
  'feature_idx': 3.0,
  'threshold': 1.550000011920929,
  'is_leaf': False,
  'left_node': 'Rl',
  'right_node': 'Rr',
  'is_categorical': False,
  'prediction_probability': [nan, nan, nan]},
 {'node_id': 'Rl',
  'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
  'feature_idx': 3.0,
  'threshold': 0.7000000029802322,
  'is_leaf': False,
  'left_node': 'Rll',
  'right_node': 'Rlr',
  'is_categorical': False,
  'prediction_probability': [nan, nan, nan]},
 {'node_id': 'Rr',
  'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
  'feature_idx': 0.0,
  'threshold': 6.099999904632568,
  'is_leaf': False,
  'left_node': 'Rrl',
  'right_node': 'Rrr',
  'is_categorical': False,
  'prediction_probability': [nan, nan, nan]},
 {'node_id': 'Rll',
  'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
  'feature_idx': 3.0,
  'thresh

In [35]:
dicts_complete[0]["nodes"][0]["prediction_probability"]

list

In [16]:
rf_complete_recon.predict(X_test)

ValueError: shape mismatch: value array of shape (15,3) could not be broadcast to indexing result of shape (15,1)

In [14]:
rf_complete_recon.predict(X_test)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0])

In [15]:
rf_complete.predict(X_test)

array([2, 1, 1, 1, 2, 2, 1, 1, 0, 2, 0, 0, 2, 2, 0, 2, 1, 0, 0, 0, 1, 0,
       1, 2, 2, 1, 1, 1, 1, 0, 1, 2, 1, 0, 2, 0, 0, 0, 0, 2, 1, 0, 2, 2,
       1])

In [16]:
rf_complete_recon.estimators_[0].classes_

In [17]:
rf_complete.estimators_[0].classes_

[0, 1, 2]

In [19]:
dicts_complete_recon

[{'tree_type': 'RuleTree.tree.RuleTreeClassifier',
  'nodes': [{'node_id': 'R',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 3.0,
    'threshold': 1.550000011920929,
    'is_leaf': False,
    'left_node': 'Rl',
    'right_node': 'Rr',
    'is_categorical': False},
   {'node_id': 'Rl',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 2.0,
    'threshold': 2.449999988079071,
    'is_leaf': False,
    'left_node': 'Rll',
    'right_node': 'Rlr',
    'is_categorical': False},
   {'node_id': 'Rr',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 0.0,
    'threshold': 6.099999904632568,
    'is_leaf': False,
    'left_node': 'Rrl',
    'right_node': 'Rrr',
    'is_categorical': False},
   {'node_id': 'Rll',
    'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'feature_idx': 2.0,
    'threshold': 2.449999988079

In [27]:
dicts_complete[5]["args"]

{'max_leaf_nodes': inf,
 'min_samples_split': 2,
 'max_depth': 5,
 'prune_useless_leaves': False,
 'stump_selection': 'random',
 'random_state': 1286572245,
 'base_stumps': [(1.0,
   {'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
    'args': {'max_depth': 1}})]}

In [None]:
from RuleTree.encoding._deprecated.ruletree_to_jakowski_tree_encoder import ruletree_to_jakowski
from RuleTree.encoding.jankowski import dict_to_jankowski