### Example using xgboost Dmatrix with Iris dataset

In [2]:
import xgboost as xgb
from sklearn import datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.datasets import dump_svmlight_file


iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


### IF working with numpy arrays...
if 1 == 0: # <- just preventing this from running.
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

### Using svmlight format for less memory consumption
dump_svmlight_file(X_train, y_train, 'dtrain.svm', zero_based=True)
dump_svmlight_file(X_test, y_test, 'dtest.svm', zero_based=True)
dtrain_svm = xgb.DMatrix('dtrain.svm')
dtest_svm = xgb.DMatrix('dtest.svm')

# Need to set the parameters for XGboost to work.
param = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.3,  # the training step for each iteration
    'silent': 1,  # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 3}  # the number of classes that exist in this datset
num_round = 20  # the number of training iterations


### Start training the data

In [4]:
bst = xgb.train(param, dtrain_svm, num_round)

### To see the model in readable form
if 1 == 0:
    bst.dump_model('dump.raw.txt')
    

Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




### Predict the data

In [9]:
preds = bst.predict(dtest_svm)
import numpy as np
best_preds = np.asarray([np.argmax(line) for line in preds])

In [10]:
from sklearn.metrics import precision_score

print(precision_score(y_test, best_preds, average='macro'))

1.0
