<a href="https://colab.research.google.com/github/basetenlabs/demos/blob/main/Deploying%20a%20XGBoost%20Model%20with%20Baseten.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

![Baseten](https://assets.website-files.com/624535121db2930bcd043f5d/62453d9bddc3de287134cb76_baseten-logo.svg)

# Installing dependencies

In [None]:
!pip install xgboost
!pip install scikit-learn
!pip install baseten


# Import MNIST data, split into training and test sets

In [10]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import xgboost as xgb

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

param = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.3,  # the training step for each iteration
    'silent': 1,  # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 3}  # the number of classes that exist in this datset

num_round = 20  # the number of training iterations

# Train and serialize XGBoost model

In [None]:
import joblib

param = {
    'max_depth': 3, # the maximum depth of each tree
    'eta': 0.3,     # the training step for each iteration
    'silent': 1,    # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 3 # the number of classes that exist in this dataset
}  

num_round = 20  # the number of training iterations

boosted_model = xgb.train(param, dtrain, num_round)

joblib.dump(boosted_model, 'boosted_model.pkl', compress=True)

# Generate files to deploy to Baseten

In [32]:
MODEL_CODE_AS_STR = """import joblib

class XgBoostModel(object):
    def load(self):
        self.model = joblib.load(open('model/bst_model.pkl', 'rb'))

    def predict(self, inputs):
        return self.model.predict(inputs)
"""

REQUIREMENTS_FILE = """
xgboost==0.90
joblib==1.1.0
scikit-learn==1.0.2
"""

with open('xgboost_model.py', 'w') as py_file:
    py_file.write(MODEL_CODE_AS_STR)

with open('requirements.txt', 'w') as py_file:
    py_file.write(REQUIREMENTS_FILE)

# Call Baseten API

In [None]:
import baseten
baseten.login("*** INSERT API KEY ***") # https://docs.baseten.co/applications/overview/api-keys
baseten.deploy_custom(
    model_name='XgBoost Model',
    model_class='XgBoostModel',
    model_files=['xgboost_model.py', 'boosted_model.pkl'],
    requirements_file='requirements.txt'
)