In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from RuleTree import RuleTreeClassifier
from sklearn.ensemble import BaggingClassifier
from RuleTree.utils.dict_utils import make_complete_rule_tree, get_tree_depth
import numpy as np

In [5]:
from sklearn.preprocessing import MinMaxScaler
import warnings
from RuleTree.stumps.classification import PartialPivotTreeStumpClassifier, DecisionTreeStumpClassifier, PivotTreeStumpClassifier
from sklearn import datasets
iris = datasets.load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

minmax_scaler = MinMaxScaler()
X = minmax_scaler.fit_transform(iris_df.drop('target', axis=1).values)
y = iris_df['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42, stratify=y_train)
rf = BaggingClassifier(
    RuleTreeClassifier(
        max_depth=3,
        base_stumps=[
            PartialPivotTreeStumpClassifier(n_shapelets=np.inf, n_features_strategy='all', selection='all', n_jobs=10, random_state=42),
            DecisionTreeStumpClassifier(max_depth=1, random_state=42),
            PivotTreeStumpClassifier(max_depth=1, random_state=42),
        ],
        random_state=42, stump_selection='best'),
    n_estimators=1000, random_state=42, n_jobs=1 #MUST BE 1
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    rf.fit(X_train, y_train)
y_pred = rf.predict(X_val)
print("F1 Score (val):", f1_score(y_val, y_pred, average='macro'))
y_pred = rf.predict(X_test)
print("F1 Score:", f1_score(y_test, y_pred, average='macro'))

F1 Score (val): 1.0
F1 Score: 0.9581699346405229


In [3]:
rf.estimators_[0].to_dict("dict")

{'tree_type': 'RuleTree.tree.RuleTreeClassifier',
 'args': {'max_leaf_nodes': inf,
  'min_samples_split': 2,
  'max_depth': 3,
  'prune_useless_leaves': False,
  'stump_selection': 'best',
  'random_state': 1952926171,
  'base_stumps': [(np.float64(0.3333333333333333),
    {'stump_type': 'RuleTree.stumps.classification.PartialPivotTreeStumpClassifier',
     'args': {'max_depth': 1,
      'n_shapelets': inf,
      'n_ts_for_selection': 100,
      'n_features_strategy': 'all',
      'selection': 'all',
      'distance': 'euclidean',
      'scaler': None,
      'use_combination': True,
      'random_state': 42,
      'n_jobs': 10},
     'n_shapelets': inf,
     'n_ts_for_selection': 100,
     'n_features_strategy': 'all',
     'selection': 'all',
     'distance': 'euclidean',
     'random_state': 42,
     'use_combination': True,
     'n_jobs': 10}),
   (np.float64(0.6666666666666666),
    {'stump_type': 'RuleTree.stumps.classification.DecisionTreeStumpClassifier',
     'args': {'max_dept

In [14]:
def get_mask(node: dict):
    mask = np.zeros((1, node['n_features']))

    if 'DecisionTreeStumpClassifier' in node['stump_type']:
        mask[0, node['feature_idx']] = 1
    if 'PivotTreeStumpClassifier' in node['stump_type']:
        mask[:, :] = 1
    if 'PartialPivotTreeStumpClassifier' in node['stump_type']:
        mask = (~np.isnan(np.array(node['shapelets']))).astype(float)

    return mask

    

def create_matrix(nodes:dict, max_depth:int, curr_node_idx='R', idx = 0, prediction_classes_=None, thr_matrix=None, mask=None, pred_matrix=None):
    if curr_node_idx not in nodes:
        return thr_matrix, mask, pred_matrix
    
    n_features = nodes[curr_node_idx]['n_features']
    if prediction_classes_ is None:
        prediction_classes_ = nodes[curr_node_idx]['prediction_classes_']
    predict_proba_dict = dict(zip(nodes[curr_node_idx]['prediction_classes_'], nodes[curr_node_idx]['prediction_probability']))

    if thr_matrix is None:
        thr_matrix = -np.ones((2**max_depth-1, 1))
        mask = np.zeros((2**max_depth-1, n_features))
        pred_matrix = np.zeros(((2**max_depth-1, len(prediction_classes_))))


    if not nodes[curr_node_idx]['is_leaf']:
        thr_matrix[idx] = nodes[curr_node_idx]['threshold']
        mask[idx] = get_mask(nodes[curr_node_idx])

    for i, el in enumerate(prediction_classes_):
        pred_matrix[idx, i] = predict_proba_dict[el]
        
    
    thr_matrix, mask, pred_matrix = create_matrix(nodes, max_depth, curr_node_idx+'l', 2*idx+1, prediction_classes_, thr_matrix, mask, pred_matrix)
    thr_matrix, mask, pred_matrix = create_matrix(nodes, max_depth, curr_node_idx+'r', 2*idx+2, prediction_classes_, thr_matrix, mask, pred_matrix)

    return thr_matrix, mask, pred_matrix

rf.estimators_[0].to_dict("dict")
dizionario = make_complete_rule_tree("dict")
nodes = {el['node_id']: el for el in dizionario['nodes']}
create_matrix(nodes=nodes, max_depth=get_tree_depth(dizionario)+1)

(array([[ 0.36252324],
        [ 0.36252324],
        [ 0.48586059],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.36252324],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [ 0.48586059],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1.        ],
        [-1

In [17]:
from tqdm.auto import tqdm

res_matrices = []

for tree in tqdm(rf.estimators_):
    tree.to_dict("dict")
    dizionario = make_complete_rule_tree("dict")
    nodes = {el['node_id']: el for el in dizionario['nodes']}
    res_matrices.append(create_matrix(nodes=nodes, max_depth=get_tree_depth(dizionario)+1))

  0%|          | 0/1000 [00:00<?, ?it/s]

Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 4
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to complete: 3
Leaves to 