In [1]:
# Import libraries and packages
from sklearn import datasets
import numpy as np

#### Load Iris data set

In [2]:
# Load Iris Data
iris = datasets.load_iris()

In [3]:
# Split dataset into features and target variable
X = iris.data  
y = iris.target

In [4]:
print(X.shape)
X

(150, 4)


array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [5]:
print(y.shape)
y

(150,)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [6]:
import xgboost as xgb
print(xgb.__version__)

0.90


#### Split Train, Test data sets for modeling 

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100, stratify=y)

In [8]:
X_train.shape

(120, 4)

#### Train a XGBoost Classifier Model

In [9]:
bt = xgb.XGBClassifier(
    max_depth=5, learning_rate=0.2, n_estimators=10, objective="multi:softmax"
)  # Setup xgboost model
bt.fit(X_train, y_train, verbose=False)  # Train it to our data

XGBClassifier(learning_rate=0.2, max_depth=5, n_estimators=10,
              objective='multi:softprob')

In [10]:
# Predict and compare with real labels
bt.predict(X_test)

array([1, 1, 1, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 0, 0, 1, 0, 2, 0, 1,
       1, 0, 1, 0, 0, 1, 2, 1])

In [11]:
y_test

array([1, 1, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 0, 0, 1, 0, 2, 0, 1,
       1, 0, 2, 0, 0, 2, 2, 1])

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Save the model as a file using joblib dump

In [13]:
model_file_name = "/content/drive/My Drive/Edureka/DEMO-local-xgboost-model"

# Save model using Booster  - Problem with predict ('numpy.ndarray' object has no attribute 'feature_names')
#bt._Booster.save_model(model_file_name)

# Save model using sklearn XGBClassifier API - Problem with predict ('XGBClassifier' object has no attribute '_le')
#bt.save_model(model_file_name)


# Save model using pickle - Recommended according to https://github.com/dmlc/xgboost/pull/3829
# Use joblib which is considered better than pickling 
import joblib
joblib.dump(bt, model_file_name) 

['/content/drive/My Drive/Edureka/DEMO-local-xgboost-model']