# First Notebook: Basic Walkthrough of XGBoost
@dzhang203
2019-05-21

Resources:

* [Official XGBoost python introduction](https://xgboost.readthedocs.io/en/latest/python/python_intro.html)
* [XGBoost python demos](https://github.com/dmlc/xgboost/tree/master/demo/guide-python)

In [None]:
import os
import numpy as np
import matplotlib as plt # check on this...
# import scipy.sparse
import xgboost as xgb

In [None]:
os.getcwd()

In [None]:
PATH_DATA = '../data/'

# Load data

In [None]:
# load data from text files
dtrain = xgb.DMatrix(PATH_DATA + 'agaricus.txt.train')
dtest = xgb.DMatrix(PATH_DATA + 'agaricus.txt.test')

In [None]:
# specify parameters via map
param = {
    'max_depth': 2,
    'eta': 1,
    'silent': 1,
    'objective': 'binary:logistic'
}

In [None]:
# specify validations to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2

# Train model

In [None]:
# train model
bst = xgb.train(param,
                dtrain,
                num_round,
                watchlist)

# Project to obtain predictions

In [None]:
preds = bst.predict(dtest)

In [None]:
labels = dtest.get_label()
print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))

# Understanding our results

In [None]:
xgb.plot_importance(bst)

In [None]:
xgb.plot_tree(bst, num_trees=2)