# xgboost.XGBClassifier
This notebook demonstrates a xgboost.XGBClassifier

In [1]:
# magic to help out Jupyter notebooks
import os, sys
sys.path.append(os.path.abspath('..\src'))

In [2]:
import pandas as pd 

# allow log messages in notebooks
import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# use the pecarn module to bring the PECARN dataset into the notebook
from data import pecarn

# cleaned PECARN data
pecarn_cleaned = pecarn.clean(pecarn.load(fromCsv=False))

# processed data, ready for splitting into training and test sets
X = pecarn_cleaned.drop(columns='PosIntFinal')
y = pecarn.preprocess(pecarn_cleaned[['PosIntFinal']])

INFO:data.pecarn.load:Loading from Pickle file c:\Jan\Capstone\notebooks\PECARN_TBI.pkl


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, test_size=0.25, stratify=y,      random_state=1234)

In [4]:
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline 

clf = XGBClassifier()
pipeline = Pipeline(steps=[
    ('data.pecarn.preprocess', pecarn.make_preprocess_pipeline()),
    ('xgboost', clf)
])

In [5]:
pipeline.fit(X_train, y_train)

[Pipeline] .. (step 1 of 1) Processing convert_to_float, total=   0.5s


Pipeline(steps=[('data.pecarn.preprocess',
                 Pipeline(steps=[('convert_to_float',
                                  FunctionTransformer(func=<function _convert_to_float at 0x0000023B4CCC6B88>))],
                          verbose=True)),
                ('xgboost', XGBClassifier())])

In [6]:
pipeline.score(X_test, y_test)

0.9872740686093693

In [7]:
from sklearn.metrics import f1_score
y_pred = pipeline.predict(X_test)
f1_score(y_test, y_pred)

0.4888888888888889