# sklearn.DecisionTreeClassifier
This notebook demonstrates a simple sklearn.DecisionTreeClassifier

In [1]:
# magic to help out Jupyter notebooks
import os, sys
sys.path.append(os.path.abspath('..\src'))

In [2]:
import pandas as pd 

# allow log messages in notebooks
import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# use the pecarn module to bring the PECARN dataset into the notebook
from data import pecarn

# cleaned PECARN data
pecarn_cleaned = pecarn.clean(pecarn.load(fromCsv=False))

# processed data, ready for splitting into training and test sets
X = pecarn_cleaned.drop(columns='PosIntFinal')
y = pecarn.preprocess(pecarn_cleaned[['PosIntFinal']])

INFO:data.pecarn.load:Loading from Pickle file c:\Jan\Capstone\notebooks\PECARN_TBI.pkl


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, test_size=0.25, stratify=y)

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline 

clf = DecisionTreeClassifier()
pipeline = Pipeline(steps=[
    ('data.pecarn.preprocess', pecarn.make_preprocess_pipeline()),
    ('decisiontree', clf)
])

In [5]:
pipeline.fit(X_train, y_train)

[Pipeline] .. (step 1 of 1) Processing convert_to_float, total=   0.7s


Pipeline(memory=None,
         steps=[('data.pecarn.preprocess',
                 Pipeline(memory=None,
                          steps=[('convert_to_float',
                                  FunctionTransformer(accept_sparse=False,
                                                      check_inverse=True,
                                                      func=<function _convert_to_float at 0x0000016EE2A94318>,
                                                      inv_kw_args=None,
                                                      inverse_func=None,
                                                      kw_args=None,
                                                      validate=False))],
                          verbose=True)),
                ('decisiontree',
                 DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None,
                                        criterion='gini', max_depth=None,
                                        max_features=None, max_leaf_nodes

In [6]:
pipeline.score(X_test, y_test)

0.9791589819254888

In [7]:
from sklearn.metrics import f1_score
y_pred = pipeline.predict(X_test)
f1_score(y_test, y_pred)

0.39893617021276595