# Hello World Bonsai

Let us verify whether or not we can get MERCS to use other kinds of models.

## Imports

In [1]:
# Basic imports
import sklearn
import numpy as np
import pandas as pd
import os
import sys


from os.path import dirname
from sklearn.datasets import load_iris
from sklearn.metrics import f1_score, accuracy_score, classification_report
from sklearn.model_selection import train_test_split

In [2]:
# Custom Imports
note_dir = os.getcwd()
root_dir = dirname(note_dir)
src_dir = os.path.join(root_dir, 'src')
bonsai_dir = os.path.join(src_dir, 'bonsaidt')

sys.path.extend([src_dir, bonsai_dir])

In [3]:
import bonsai
import mercs

from bonsai.base import *
from bonsai.utils.visuals import (plot_corr,
                                  plot_summary_grid)

## Datasets

In [4]:
iris = load_iris()
X, y = iris['data'], iris['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [5]:
train = pd.DataFrame(X_train)
train = train.assign(y=y_train)
train.head()

Unnamed: 0,0,1,2,3,y
0,5.0,3.6,1.4,0.2,0
1,6.7,3.3,5.7,2.5,2
2,6.4,2.9,4.3,1.3,1
3,6.0,2.2,5.0,1.5,2
4,6.9,3.1,5.1,2.3,2


In [6]:
test = pd.DataFrame(X_test)
test = test.assign(y=y_test)
test.head()

Unnamed: 0,0,1,2,3,y
0,6.7,2.5,5.8,1.8,2
1,6.6,3.0,4.4,1.4,1
2,6.9,3.1,4.9,1.5,1
3,4.5,2.3,1.3,0.3,0
4,5.8,4.0,1.2,0.2,0


## Training

### Normal Procedure

First let us see how it behaves if we train as usal.

In [7]:
from mercs.core import MERCS

In [8]:
model_classic = MERCS()

In [9]:
ind_parameters = {'ind_type':           'DT',
                  'ind_max_depth':       4}

sel_parameters = {'sel_type':           'Base',
                  'sel_its':            10,
                  'sel_param':          1}

model_classic.fit(train, **ind_parameters, **sel_parameters)

Inspecting our model, we see that everything looks pretty much as we would expect.

In [10]:
for m in model_classic.m_list:
    msg = """
    This model has type:    {}
    With parameters:        {}
    With Max Depth:         {}
    """.format(type(m), m.get_params(), m.get_params()['max_depth'])
    print(msg)


    This model has type:    <class 'sklearn.tree.tree.DecisionTreeClassifier'>
    With parameters:        {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': False, 'random_state': None, 'splitter': 'best'}
    With Max Depth:         4
    

    This model has type:    <class 'sklearn.tree.tree.DecisionTreeClassifier'>
    With parameters:        {'class_weight': None, 'criterion': 'gini', 'max_depth': 4, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': False, 'random_state': None, 'splitter': 'best'}
    With Max Depth:         4
    

    This model has type:    <class 'sklearn.tree.tree.DecisionTreeClassifier'>
    With parameters:      

In [11]:
nb_models = len(model_classic.m_list)
nb_models

50

In [12]:
type(model_classic.m_list[0])

sklearn.tree.tree.DecisionTreeClassifier

In [13]:
model_classic.m_list[0].classes_

array([0., 1., 2.])

If we set `ind_flatten` to `True` MERCS unpacks the random forest into its components.

### Bonsai Training

Let us try to pass our new decision trees downstream into MERCS

In [14]:
model_bonsai = MERCS()

In [15]:
ind_parameters = {'ind_type':           'DT',
                  'ind_classifier':      GiniTree,
                  'ind_regressor':       RegTree,
                  'ind_max_depth':       4}

sel_parameters = {'sel_type':           'Base',
                  'sel_its':            10,
                  'sel_param':          1}

In [16]:
model_bonsai.fit(train, **ind_parameters, **sel_parameters)

In [17]:
nb_models = len(model_bonsai.m_list)
nb_models

50

In [18]:
type(model_bonsai.m_list[0])

bonsai.base.ginitree.GiniTree

In [19]:
model_bonsai.m_list[0].max_depth

4

## Testing

### Bonsai

In [25]:
# make a query
n, m = train.values.shape
q_code = np.zeros(m)
q_code[-1:] = 1

#q_code[0] = -1
#q_code[1] = -1
#q_code[2] = -1
#q_code[3] = -1
print(q_code)

test_copy = test.copy()
test_copy.iloc[:, q_code==1] = np.nan # I just cannot believe the results are so good. So I make sure MERCS CANNOT KNOW ABOUT THE TRUTH

pred_parameters = {'predict_type':'MI'}
#pred_parameters = {'predict_type':'MA', 'predict_its': 0.1, 'predict_param': 0.99}
#pred_parameters = {'predict_type':'MAFI', 'predict_its': 0.1, 'predict_param': 0.99}
#pred_parameters = {'predict_type':'RW', 'predict_its': 64, 'predict_param': 10}

y_pred = model_bonsai.predict(test_copy,
                       **pred_parameters,
                       qry_code=q_code)

y_true = test[test.columns.values[np.array(q_code)==1]].values

obs = f1_score(y_true, y_pred, average='macro')
obs

[0. 0. 0. 0. 1.]


0.2976608187134503

In [26]:
model_bonsai.s['prediction']

{'type': 'MI', 'its': 0.1, 'param': 0.99}

In [35]:
model_bonsai.q_models[0].m_list[0].m_list

[<bonsai.base.ginitree.GiniTree at 0x7f383b972d68>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b977240>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b977358>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b977278>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b9772e8>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b9773c8>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b977470>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b977518>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b9775c0>,
 <bonsai.base.ginitree.GiniTree at 0x7f383b977668>]

### Classic

In [23]:
# make a query
n, m = train.values.shape
q_code = np.zeros(m)
q_code[-1:] = 1

#q_code[0] = -1
#q_code[1] = -1
#q_code[2] = -1
#q_code[3] = -1
print(q_code)

test_copy = test.copy()
test_copy.iloc[:, q_code==1] = np.nan # I just cannot believe the results are so good. So I make sure MERCS CANNOT KNOW ABOUT THE TRUTH

pred_parameters = {'predict_type':'MI', 'predict_its': 0.1, 'predict_param': 0.99}
#pred_parameters = {'predict_type':'MA', 'predict_its': 0.1, 'predict_param': 0.99}
#pred_parameters = {'predict_type':'MAFI', 'predict_its': 0.1, 'predict_param': 0.99}
#pred_parameters = {'predict_type':'RW', 'predict_its': 64, 'predict_param': 10}

y_pred = model_classic.predict(test_copy,
                       **pred_parameters,
                       qry_code=q_code)

y_true = test[test.columns.values[np.array(q_code)==1]].values

obs = f1_score(y_true, y_pred, average='macro')
obs

[0. 0. 0. 0. 1.]


0.9363636363636364

In [34]:
model_classic.q_models[0].m_list[0].m_list

[DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
             max_features=None, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, presort=False, random_state=None,
             splitter='best'),
 DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
             max_features=None, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, presort=False, random_state=None,
             splitter='best'),
 DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
             max_features=None, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, pre