Using XGBoost predict species of a given flower based on the size measurements of its petals.

In [3]:
from sklearn.datasets import load_iris

flowers = load_iris()
dir(flowers)

['DESCR',
 'data',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [4]:
flowers.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [5]:
flowers.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [6]:
flowers.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [9]:
flowers.data.shape

(150, 4)

In [11]:
# Split train, test
from sklearn.model_selection import train_test_split
features_train, features_test, target_train, target_test = train_test_split(
    flowers.data,
    flowers.target,
    test_size=0.2,
    random_state=0
)
features_train.shape

(120, 4)

In [18]:
import xgboost as xgb

train_dm = xgb.DMatrix(features_train, label=target_train)
test_dm = xgb.DMatrix(features_test, label=target_test)

train_dm.feature_names

['f0', 'f1', 'f2', 'f3']

In [19]:
len(flowers.target_names)

3

In [30]:
hyper_param = {
    'max_depth': 4,
    'eta': 0.3,
    'objective': 'multi:softmax',
    'eval_metric': 'mlogloss',
    'num_class': len(flowers.target_names)
}

In [31]:
hyper_param

{'max_depth': 4,
 'eta': 0.3,
 'objective': 'multi:softmax',
 'eval_metric': 'mlogloss',
 'num_class': 3}

In [32]:
# Now let's train our model
epochs = 10
model = xgb.train(hyper_param, train_dm, epochs)

In [33]:
# Time to do prediction!
predictions = model.predict(test_dm)

In [34]:
predictions

array([2., 1., 0., 2., 0., 2., 0., 1., 1., 1., 2., 1., 1., 1., 1., 0., 1.,
       1., 0., 0., 2., 1., 0., 0., 2., 0., 0., 1., 1., 0.], dtype=float32)

In [35]:
# How accurate were we?

from sklearn.metrics import accuracy_score
accuracy_score(target_test, predictions)

1.0

100% accuracy! Next let's try to try different hyperparams and epoch values to see how lower we can go

In [36]:
def xgb_predictor(params, epochs):
    model = xgb.train(hyper_param, train_dm, epochs)
    predictions = model.predict(test_dm)
    score = accuracy_score(target_test, predictions)
    print(f"Prediction Accuracy: {score}")

In [41]:
for i in range(1,10):
    print("Epochs #: ", i)
    xgb_predictor(hyper_param, i)
    
    

Epochs #:  1
Prediction Accuracy: 1.0
Epochs #:  2
Prediction Accuracy: 1.0
Epochs #:  3
Prediction Accuracy: 1.0
Epochs #:  4
Prediction Accuracy: 1.0
Epochs #:  5
Prediction Accuracy: 1.0
Epochs #:  6
Prediction Accuracy: 1.0
Epochs #:  7
Prediction Accuracy: 1.0
Epochs #:  8
Prediction Accuracy: 1.0
Epochs #:  9
Prediction Accuracy: 1.0


Even with epochs = 1, we are getting perfect accuracy. Let's next lower the hyper params.

In [42]:
for depth in range(1,4):
    p = {'max_depth': depth, 'eta': 0.3, 'objective': 'multi:softmax', 'eval_metric': 'mlogloss','num_class': 3}
    print("max_depth = ", depth)
    xgb_predictor(p, 1)


max_depth =  1
Prediction Accuracy: 1.0
max_depth =  2
Prediction Accuracy: 1.0
max_depth =  3
Prediction Accuracy: 1.0


Still perfect score. Now let's see if eta has any effect.

In [51]:
import numpy as np
for eta in np.arange(0, 1, 0.1):
    p = {'max_depth': 1, 'eta': eta, 'objective': 'multi:softmax', 'eval_metric': 'mlogloss','num_class': 3}
    print("eta = ", eta)
    xgb_predictor(p, 1)


eta =  0.0
Prediction Accuracy: 1.0
eta =  0.1
Prediction Accuracy: 1.0
eta =  0.2
Prediction Accuracy: 1.0
eta =  0.30000000000000004
Prediction Accuracy: 1.0
eta =  0.4
Prediction Accuracy: 1.0
eta =  0.5
Prediction Accuracy: 1.0
eta =  0.6000000000000001
Prediction Accuracy: 1.0
eta =  0.7000000000000001
Prediction Accuracy: 1.0
eta =  0.8
Prediction Accuracy: 1.0
eta =  0.9
Prediction Accuracy: 1.0


Still perfect score :)