In [2]:
from pathlib import Path
import os
import sys
sys.path.insert(0,str(Path(Path(os.getcwd()).parent, "3rdparty/rulecosi")))
import rulecosi

In [3]:

import tree_diff
# import importlib
# importlib.reload(tree_diff.tree_similarity)
# importlib.reload(tree_diff.stages.baseline_models)
from tree_diff.tree_similarity import rule_set_similarity
from tree_diff.stages.baseline_models import oneHotToDense, oneHot
from tree_diff.training_utils import report_metrics, SCORING, rule_overlap, rule_sparsity, train_model
from tree_diff.rule_entities import *
import tree_diff.config
config = tree_diff.config.Config()



import pandas as pd

import sklearn
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import balanced_accuracy_score, f1_score
from sklearn.tree import DecisionTreeClassifier


import imodels

from river import tree

In [4]:
FEATURES = ["age","workclass","fnlwgt","education","education-num","marital","occupation","relationship","race","sex","capital-gain","capital-loss","hours","native"]
Y_COLUMN = "income"
STRING_COLUMNS = ["workclass","education","marital","occupation","relationship","race","sex","native",]

def load_batch(file_path, columns=FEATURES+[Y_COLUMN]):
    file_path = Path(Path().absolute().parent, "input/adult/", file_path)
    batch = pd.read_csv(file_path, header=None)
    batch.columns = columns
    return batch
    
batch_1_train = load_batch("batch_1/adult1.data")
batch_1_test = load_batch("batch_1/adult1.test")

batch_2_train = load_batch("batch_2/adult2.data")
batch_2_test = load_batch("batch_2/adult2.test")

In [None]:
scorer1 = tree_diff.training_utils.train_model(rulecosi.RuleCOSIClassifier(base_ensemble=GradientBoostingClassifier()),
                                    batch_1_train[FEATURES],
                                    batch_1_train[Y_COLUMN],                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "rulecosi")

scorer2 = tree_diff.training_utils.train_model(rulecosi.RuleCOSIClassifier(base_ensemble=GradientBoostingClassifier()),
                                    batch_2_train[FEATURES],
                                    batch_2_train[Y_COLUMN],                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "rulecosi")



In [None]:
def rulecosi_rule_to_conditions(rule):    
    conditions = [Condition(a[1].att_name, 
                            tree_diff.rule_entities.import_operator(a[1].op.__name__), a[1].value) 
                  for a in rule.A]
    return Rule(label = rule.y, conditions=conditions)

def rulecosi_rules_to_converted_rules(results, key):
    rulecosi_rules = results['estimator'][-1][key].simplified_ruleset_.rules
    converted_rules = list(map(rulecosi_rule_to_conditions, rulecosi_rules))
    return Ruleset(rules = converted_rules)

RS1 = rulecosi_rules_to_converted_rules(scorer1, "rulecosi")
RS2 = rulecosi_rules_to_converted_rules(scorer2, "rulecosi")
print(report_metrics(scorer1, SCORING))
print(report_metrics(scorer2, SCORING))
print(f"RS1 overlap {rule_overlap(RS1):0.2f}, RS1 sparsity {rule_sparsity(RS1):0.2f}")
print(f"RS2 overlap {rule_overlap(RS2):0.2f}, RS2 sparsity {rule_sparsity(RS2):0.2f}")
print(f"Similarity: {rule_set_similarity(RS1, RS2):0.2f}")

In [None]:
scorer3 = tree_diff.training_utils.train_model(rulecosi.RuleCOSIClassifier(base_ensemble=GradientBoostingClassifier()),
                                    pd.concat([batch_1_train[FEATURES], batch_2_train[FEATURES]]),
                                    pd.concat([batch_1_train[Y_COLUMN], batch_2_train[Y_COLUMN]]),                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "rulecosi")
RS3 = rulecosi_rules_to_converted_rules(scorer3, "rulecosi")
print(report_metrics(scorer3, SCORING))
print(f"RS3 overlap {rule_overlap(RS3):0.2f}, RS3 sparsity {rule_sparsity(RS3):0.2f}")
print(f"Similarity: {rule_set_similarity(RS1, RS3):0.2f}")

In [None]:
scap1 = tree_diff.training_utils.train_model(GradientBoostingClassifier(),
                                    batch_1_train[FEATURES],
                                    batch_1_train[Y_COLUMN],                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "scap")

scap2 = tree_diff.training_utils.train_model(GradientBoostingClassifier(),
                                    batch_2_train[FEATURES],
                                    batch_2_train[Y_COLUMN],                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "scap")
print(report_metrics(scap1, SCORING))
print(report_metrics(scap2, SCORING))

In [None]:
def train_ruleset(X, y):
    rulecosi_plus = tree_diff.training_utils.train_model(rulecosi.RuleCOSIClassifier(base_ensemble=GradientBoostingClassifier()),
                                        X,
                                        y,                                     
                                        oneHotToDense(STRING_COLUMNS), 
                                        config, 
                                        "rulecosi")
    print(report_metrics(rulecosi_plus, SCORING))
    return rulecosi_rules_to_converted_rules(rulecosi_plus, "rulecosi"), rulecosi_plus['estimator'][-1]

# Input: Ruleset, Dataset

D_2_X = pd.concat([batch_1_train[FEATURES], batch_2_train[FEATURES]])
D_2_y = pd.concat([batch_1_train[Y_COLUMN], batch_2_train[Y_COLUMN]])


Training on the subset will ensure there is almost 0 overlap. 

In [None]:
RS_all, RS_all_model = train_ruleset(D_2_X, D_2_y)

In [None]:
y_pred_new = RS_all_model.predict(D_2_X)

new_indexes = [i for i in indexes if y_pred_new[i] != D_2_y.iloc[i]]
len(indexes), len(new_indexes)

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline


def report_metrics(scores, scoring):
    strings = []
    for k in scoring.keys():
        key = f"test_{k}"
        s = f"{scores[key].mean():0.2f}"
        strings.append(f"{k}: {s}")
    return ", ".join(strings)

def var_name(var):
    vars = [v for v in globals() if globals()[v] == var]
    return vars[0] if len(vars) > 0 else None

def train_model(model, X, y, preprocessor, config, model_name=None):
    pipeline = []
    if preprocessor:
        for p in preprocessor:
            pipeline.append(p)
    pipeline.append((model_name if model_name else str(model), model))
    pipeline = Pipeline(pipeline)
    scores = cross_validate(pipeline, X, y, return_estimator=True, cv=config.cv, scoring=SCORING)
    #LOGGER.info(f"{str(model)} : {report_metrics(scores, SCORING)}")
    return scores

figs1 = train_model(imodels.FIGSClassifier(), 
            batch_1_train[FEATURES],
            [1 if i == ' <=50K' else 0 for i in  list(batch_1_train[Y_COLUMN])],
            oneHotToDense(STRING_COLUMNS), 
            config,
            "FIGS")

figs2 = train_model(imodels.FIGSClassifier(), 
            D_2_X,
            [1 if i == ' <=50K' else 0 for i in  list(D_2_y)],
            oneHotToDense(STRING_COLUMNS), 
            config,
            "FIGS")
print(report_metrics(figs1, SCORING))
print(report_metrics(figs2, SCORING))

# Evidence that Decision Sets are more interpretable than Decision Lists: IDS paper

In [None]:
figs1['estimator'][-1][-1].plot()

In [None]:
figs1['estimator'][-1][-1]

In [None]:
def walk_tree(node, fetch_children, is_leaf):
    """Generator function that walks a tree.

    fetch_children: Callable that accepts a node and path_to_node.
        Returns a List[Tuple] where Tuple = (path_to_node, child_node)

    is_leaf: Callable that accepts a node and returns true if a leaf node.

    Usage:
    >>> [p for p in walk_tree(tree, lambda x, p: [(p, x.left), (p, x.right)], lambda x: x.is_leaf)]
    """
    stack = deque()
    stack.append(([],node))
    while stack:
        path_to_node, node = stack.pop()
        if is_leaf(node):
            yield path_to_node + [node]
        else:
            children = fetch_children(node, path_to_node)
            for child in children:
                stack.append(child)

def children(node, path):
    return [(path +[node], node.left), (path +[node], node.right)]
              
def is_leaf(node):
    return node.left is None and node.right is None

def create_conditions(path_conds):
    return [Condition(f"attr_{c.feature}", Operator.LE, c.threshold) for c in path_conds]

def create_rule(path):
    return Rule(conditions=create_conditions(path[0:-1]), label=f"{path[-1].value[0][0]:0.2f}")

def extract_rules(tree):
    return [create_rule(p) for p in walk_tree(tree, children, is_leaf)]

RS_FIG1 = Ruleset(rules=[i for t in figs1['estimator'][-1][-1].trees_ for i in extract_rules(t)])
RS_FIG2 = Ruleset(rules=[i for t in figs2['estimator'][-1][-1].trees_ for i in extract_rules(t)])

print(f"RS_FIG1 overlap {rule_overlap(RS_FIG1):0.2f}, RS_FIG1 sparsity {rule_sparsity(RS_FIG1):0.2f}")
print(f"RS_FIG2 overlap {rule_overlap(RS_FIG2):0.2f}, RS_FIG2 sparsity {rule_sparsity(RS_FIG2):0.2f}")
print(f"Similarity: {rule_set_similarity(RS_FIG1, RS_FIG2):0.2f}")

In [None]:
RS_FIG1

In [None]:
RS_FIG2

# Algorithm Notes
Algorithms contain the following steps:
* Rule candidate generation
* Rule selection 
* Rule post-processing

Decision sets should be the focus as they minimise the barriers to interpretability:
* New rules can be added or removed without affecting the rest of the model (locally modifiable)
* Changes to existing rules does not require reasoning about the rest of the model (locally verifiable)
* Decision sets are globally and locally interpretable (which rules fired, and what logic is encoded)

Other observations
* Output is a decision tree (Decision sets will be left to the extension) 
* Need a new similarity measure that supports similarity of decision sets (sort by attributes by name, operator and threshold).

TODO:
* Train on Extremely Fast Decison Tree
* Demonstrate idea with pretrained model


# Simplified Problem Setting

* Assume a tree model has been pre-trained on D1, T_original
* Assume a tree ensemble has been trained on data D1 + D2 -> E = {T_1, T_2 ... T_n}

How can we extend the decision tree T_original with the fewest modifications to achieve the same accuracy as the tree ensemble, E?

In [None]:
T_original = tree_diff.training_utils.train_model(DecisionTreeClassifier(),
                                    batch_1_train[FEATURES],
                                    batch_1_train[Y_COLUMN],                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "T_original")

D_2_X = pd.concat([batch_1_train[FEATURES], batch_2_train[FEATURES]])
D_2_y = pd.concat([batch_1_train[Y_COLUMN], batch_2_train[Y_COLUMN]])

E = tree_diff.training_utils.train_model(DecisionTreeClassifier(),
                                    D_2_X,
                                    D_2_y,                                     
                                    oneHotToDense(STRING_COLUMNS), 
                                    config, 
                                    "E")

print(report_metrics(T_original, SCORING))
print(report_metrics(E, SCORING))

### Baseline:
Using an incremental learning decision tree as the base line. 

In [None]:
model = tree.ExtremelyFastDecisionTreeClassifier(max_depth = 1)

def test_incrementally(model, X_df, y_df):
    y_pred = []
    for i in range(0, len(X_df)):
        x = dict(X_df.iloc[i])
        y_pred.append(model.predict_one(x))
    return f1_score([1 if i == ' <=50K' else 0 for i in y_df], 
                    [1 if i == ' <=50K' else 0 for i in y_pred])  

def train_incrementally(model, X_df, y_df):
    for i in range(0, len(X_df)):
        x = dict(X_df.iloc[i])
        y = y_df.iloc[i]    
        model.learn_one(x,y)

train_incrementally(model, batch_1_train[FEATURES], batch_1_train[Y_COLUMN])
test_incrementally(model, batch_1_test[FEATURES], batch_1_test[Y_COLUMN])

In [None]:
model2 = copy.deepcopy(model)
train_incrementally(model2, batch_2_train[FEATURES], batch_2_train[Y_COLUMN])
test_incrementally(model2, batch_2_test[FEATURES], batch_2_test[Y_COLUMN])

In [None]:
test_incrementally(model, batch_2_test[FEATURES], batch_2_test[Y_COLUMN])

Need to convert incrementally trained trees to our rule set classes for similarity calculations. 

In [None]:
import river

def river_children(node, path):
    if isinstance(node, river.tree.nodes.efdtc_nodes.EFDTNominalMultiwayBranch):
        return [(path + [(node, i)], n) for i, n in enumerate(node.children)]            
    return [(path + [node], n) for n in node.children]
              
def river_is_leaf(node):
    return node.n_leaves == 1

def river_return_condition(node):
    if isinstance(node, river.tree.nodes.efdtc_nodes.EFDTNumericBinaryBranch):
        return Condition(f"attr_{node.feature}", Operator.LE, node.threshold)
    elif isinstance(node, tuple):   # Multinomial 
        feature = node[0].feature
        threshold = node[0]._r_mapping[node[1]]
        return Condition(f"attr_{feature}", Operator.EQ, threshold)
    else:
        raise ValueError(node)

def river_create_conditions(path_conds):
    return [river_return_condition(c) for c in path_conds]

def river_create_rule(path):
    a = path[-1].stats
    m = (None, 0)
    for k, v in a.items():
        if not m or m[1] < v:
            m = (k,v)
    label = m[0]            
    return Rule(conditions=river_create_conditions(path[0:-1]), label=f"{label}")

def river_extract_rules(tree, children, is_leaf):
    return [river_create_rule(p) for p in walk_tree(tree, children, is_leaf)]

rules_model1 = river_extract_rules(model._root,river_children, river_is_leaf)
rules_model2 = river_extract_rules(model2._root,river_children, river_is_leaf)

In [None]:
print(f"EFDT Similarity: {rule_set_similarity(rules_model1, rules_model2):0.2f}")

The similarity between two instances of the EFDT algorithm indicates the two models are quite close. This is to be expected as EFDT is a) an incremental learning algorithm designed to incrementally be adapated by concentrating on when to split, and b) allows for backtracking over the nodes to re-evaluate split decisions. 

**Recommendation**
* Shift back to the rule sets or provide an additional complexity constraint to model

Convert Sklearn Tree to a decision set for simplicity.

In [None]:
len(rules_model2), len(RS2), len(RS_FIG2)
# Tree, Rule List, Rule Ensemble

# Sklearn Tree to rule set

In [None]:
sklearn_tree = T_original['estimator'][-1]['T_original'].tree_

def sklearn_children(node, path):
    pass

def sklearn_is_leaf(node):
    pass



# for p in walk_tree(sklearn_tree, sklearn_children, sklearn_is_leaf):
#     print p

river_extract_rules(model._root,river_children, river_is_leaf)[0]

In [None]:
import collections
collections.Counter(batch_1_train[Y_COLUMN])


In [None]:
#y_pred = 
y_pred = [not i for i in list(batch_1_train['capital-gain'] > 9090)]
y_true = [i == ' <=50K' for i in list(batch_1_train[Y_COLUMN])]
f1_score(y_true, y_pred)

# Android melware dataset


In [7]:
# https://ieeexplore-ieee-org.ezproxy-b.deakin.edu.au/document/9312053
    

df_span = pd.read_csv("/Users/scott/projects/research-projects/tree_diff/input/TUANDROMD/TUANDROMD.csv")

LABEL = 'Label'
FEATURES = [l for l in list(df_span.columns) if not l == LABEL] 

df_span.dropna(inplace=True)

android_train_X, android_test_X, android_train_y, android_test_y = sklearn.model_selection.train_test_split(df_span[FEATURES],df_span[LABEL], test_size=0.3)

android_train_y = [i == "malware" for i in android_train_y]
android_test_y = [i == "malware" for i in android_test_y]
    

df_mushrooms = pd.read_csv("/Users/scott/projects/research-projects/tree_diff/input/MushroomDataset/secondary_data.csv", delimiter=";")

def classification_csv_loader(file_name, prediction_column, categorical_columns=None, **kwargs):
    df = pd.read_csv(file_name, **kwargs)
    if categorical_columns:
        new_dfs = [pd.get_dummies(df[i], prefix=i, dummy_na=True) for i in categorical_columns]
        df = pd.concat(new_dfs + [df[df.columns.difference(categorical_columns)]], axis=1)        
    features = [l.strip() for l in list(df.columns) if not l == prediction_column]
    feature_key = {v:i for i,v in enumerate(set(df[prediction_column]))}
    df[prediction_column].replace(feature_key, inplace=True)
    train_X, test_X, train_y, test_y = sklearn.model_selection.train_test_split(df[features],df[prediction_column], test_size=0.3)
    return train_X, test_X, train_y.to_numpy(), test_y.to_numpy()


categorical = ['cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
       'stem-root', 'stem-surface', 'stem-color',
       'veil-type', 'veil-color', 'has-ring', 'ring-type', 'spore-print-color',
       'habitat', 'season']

m_train_X, m_test_X, m_train_y, m_test_y = classification_csv_loader("/Users/scott/projects/research-projects/tree_diff/input/MushroomDataset/secondary_data.csv", 
                            "class",
                           categorical,
                            delimiter=";")

In [6]:
def predict_batch(trained_model, X_df):
    y_pred = []
    if hasattr(trained_model, "predict_one"):
        for i in range(0, len(X_df)):
            x = dict(X_df.iloc[i])            
            value = trained_model.predict_one(x)
            y_pred.append(value)
    else:
        y_pred = trained_model.predict(X_df)
    return y_pred

def test_incrementally(trained_model, X_df, y_df):
    assert len(X_df) == len(y_df), "Instances and labels should be the same length"
    y_pred = predict_batch(trained_model, X_df)
    assert len(y_pred) == len(y_df), "Predition length should be same as labels"
    return balanced_accuracy_score(y_df, y_pred) 

# SGT are provided in the river project

def train_incrementally(model_to_train, X_df, y_df):    
    # Support incremental learning algorithms from river
    if hasattr(model_to_train, "learn_one"):
        for i in range(0, len(X_df)):
            x = dict(X_df.iloc[i])
            model_to_train.learn_one(x, y_df[i])
    else:
        model_to_train.fit(X_df, y_df)

In [71]:
model = tree.ExtremelyFastDecisionTreeClassifier(max_depth=10)        
train_incrementally(model, android_train_X, android_train_y)
test_incrementally(model, android_test_X, android_test_y)

0.9642691415313225

In [213]:
dt = DecisionTreeClassifier(max_depth=5)
train_incrementally(dt, android_train_X, android_train_y)
test_incrementally(dt, android_test_X, android_test_y)

0.9673106779368639

In [183]:
dt = DecisionTreeClassifier(max_depth=5)
train_incrementally(dt, m_train_X, m_train_y)
test_incrementally(dt, m_test_X, m_test_y), dt.get_n_leaves(), dt.get_depth()

(0.7448184536832723, 20, 5)

In [None]:
efdt = tree.ExtremelyFastDecisionTreeClassifier(max_depth=5)
train_incrementally(efdt, m_train_X, list(m_train_y))
test_incrementally(efdt, m_test_X, list(m_test_y))

In [174]:
efdt.n_leaves,efdt.max_depth

(6, 5)

In [215]:
efdt = tree.ExtremelyFastDecisionTreeClassifier(max_depth=5)
train_incrementally(efdt, m_train_X, list(m_train_y))
test_incrementally(efdt, m_test_X, list(m_test_y))

0.6003161118245579

In [8]:
# Higgs
df_higgs = pd.read_csv("/Users/scott/projects/research-projects/tree_diff/input/Higgs/HIGGS.csv", header=None)
df_higgs.columns = ["prediction","lepton_pT","lepton_eta","lepton_phi","missing_energy_magnitude","missing_energy_phi","jet_1_pt","jet_1_eta","jet_1_phi","jet_1_b-tag","jet_2_pt","jet_2_eta","jet_2_phi","jet_2_b-tag","jet_3_pt","jet_3_eta","jet_3_phi","jet_3_b-tag","jet_4_pt","jet_4_eta","jet_4_phi","jet_4_b-tag","m_jj","m_jjj","m_lv","m_jlv","m_bb","m_wbb","m_wwbb"]
df_higgs.head()


Unnamed: 0,prediction,lepton_pT,lepton_eta,lepton_phi,missing_energy_magnitude,missing_energy_phi,jet_1_pt,jet_1_eta,jet_1_phi,jet_1_b-tag,...,jet_4_eta,jet_4_phi,jet_4_b-tag,m_jj,m_jjj,m_lv,m_jlv,m_bb,m_wbb,m_wwbb
0,1.0,0.869293,-0.635082,0.22569,0.32747,-0.689993,0.754202,-0.248573,-1.092064,0.0,...,-0.010455,-0.045767,3.101961,1.35376,0.979563,0.978076,0.920005,0.721657,0.988751,0.876678
1,1.0,0.907542,0.329147,0.359412,1.49797,-0.31301,1.095531,-0.557525,-1.58823,2.173076,...,-1.13893,-0.000819,0.0,0.30222,0.833048,0.9857,0.978098,0.779732,0.992356,0.798343
2,1.0,0.798835,1.470639,-1.635975,0.453773,0.425629,1.104875,1.282322,1.381664,0.0,...,1.128848,0.900461,0.0,0.909753,1.10833,0.985692,0.951331,0.803252,0.865924,0.780118
3,0.0,1.344385,-0.876626,0.935913,1.99205,0.882454,1.786066,-1.646778,-0.942383,0.0,...,-0.678379,-1.360356,0.0,0.946652,1.028704,0.998656,0.728281,0.8692,1.026736,0.957904
4,1.0,1.105009,0.321356,1.522401,0.882808,-1.205349,0.681466,-1.070464,-0.921871,0.0,...,-0.373566,0.113041,0.0,0.755856,1.361057,0.98661,0.838085,1.133295,0.872245,0.808487


In [9]:
higgs_features = ["lepton_pT","lepton_eta","lepton_phi","missing_energy_magnitude","missing_energy_phi","jet_1_pt","jet_1_eta","jet_1_phi","jet_1_b-tag","jet_2_pt","jet_2_eta","jet_2_phi","jet_2_b-tag","jet_3_pt","jet_3_eta","jet_3_phi","jet_3_b-tag","jet_4_pt","jet_4_eta","jet_4_phi","jet_4_b-tag","m_jj","m_jjj","m_lv","m_jlv","m_bb","m_wbb","m_wwbb"]
dt = DecisionTreeClassifier(max_depth=100)

h_tr_X, h_te_X, h_tr_y, h_te_y = sklearn.model_selection.train_test_split(df_higgs[higgs_features], df_higgs["prediction"], test_size=0.3)

dt.fit(h_tr_X, h_tr_y)
y_pred = dt.predict(h_te_X)
balanced_accuracy_score(h_te_y, y_pred)

0.6649874736169565

In [None]:
# node_depths = [1,2,5,10,15,20,25]
# for i in node_depths:
#     efdt = tree.ExtremelyFastDecisionTreeClassifier(max_depth=i)  
#     train_incrementally(efdt, android_train_X, android_train_y)
#     efdt_score = test_incrementally(efdt, android_test_X, android_test_y)    
    
#     df = DecisionTreeClassifier(max_depth=i)
#     train_incrementally(dt, android_train_X, android_train_y)
#     dt_score = test_incrementally(dt, android_test_X, android_test_y)    
    
#     efdt_score = test_incrementally(efdt, android_test_X, android_test_y)
#     print(f"Max nodes: {i}, DT: {dt_score:0.2f}, EFDT: {efdt_score:0.2f}")  


def walk_tree(node, fetch_children, is_leaf):
    """Generator function that walks a tree.

    fetch_children: Callable that accepts a node and path_to_node.
        Returns a List[Tuple] where Tuple = (path_to_node, child_node)

    is_leaf: Callable that accepts a node and returns true if a leaf node.

    Usage:
    >>> [p for p in walk_tree(tree, lambda x, p: [(p, x.left), (p, x.right)], lambda x: x.is_leaf)]
    """
    stack = deque()
    stack.append(([],node))
    while stack:
        path_to_node, node = stack.pop()
        if is_leaf(node):
            yield path_to_node + [node]
        else:
            children = fetch_children(node, path_to_node)
            for child in children:
                stack.append(child)   

def walk_decision_node(root):                
    for p in walk_tree(root, 
                       lambda n,p: [(p+[n], i) for i in n.children], 
                       lambda x: x.children == []):
        yield p

def print_tree(root):
    for i in walk_decision_node(root):
        print(root)

def predict(root, x):
    for i in walk_decision_node(root):
        if i.predict(x):
            return i

# Experiment
* No missing values
* Range of samples
* Mixed attributes

## Datasets
* Census Income Data (Mixed)
* Mushroom Dataset(mostly categorical)
* Android dataset (sparse)
* HIGGS (numerical)

## Benchmarks
* Tree: EFDT
* Tree: HT
* Tree: DT
* Interpretable: FIGS
* Interpretable: RuleCosi+

## Notes
* Hard > Soft
* < Oblique