In [12]:
import pandas as pd
from sklearn.datasets import make_classification

# Generate the data
n_samples, n_features = 10000, 7
X, y = make_classification(n_samples=n_samples, n_features=n_features, n_informative=5)

# Save it as a CSV
feature_names = [f"feature {i}" for i in range(n_features)]
df = pd.DataFrame(X, columns=feature_names)
df["target"] = y

df.to_csv("/Users/andrewcosta/Desktop/API/data.csv", index=False)

In [14]:
import xgboost as xgb
from sklearn.model_selection import KFold, cross_validate, train_test_split

data  = pd.read_csv('/Users/andrewcosta/Desktop/API/data.csv')
X, y = data.drop('target', axis=1), data[['target']]

# create DMatrix
dtrain = xgb.DMatrix(X.values, label=y.values)

# set parameters
params = {
    "objective": "binary:logistic",
    "booster": "gbtree",
    "eval_metric": "auc",
}

booster = xgb.train(params=params, dtrain=dtrain)


In [16]:
import bentoml

bento_xgb = bentoml.xgboost.save_model("xgb_initial", booster)
bento_xgb

Model(tag="xgb_initial:eywptibu4cdwj4dz", path="/Users/andrewcosta/bentoml/models/xgb_initial/eywptibu4cdwj4dz/")

In [17]:
from importlib_metadata import metadata


bentoml.xgboost.save_model(
    "xgb_custom",
    booster,
    metadata={"auc": 0.99,
              "feature_importance": booster.get_score(importance_type="gain")},
    labels={"author": "Andrew"}
)

Model(tag="xgb_custom:udleporu4cdwj4dz", path="/Users/andrewcosta/bentoml/models/xgb_custom/udleporu4cdwj4dz/")

In [20]:
# retrieve model
booster = bentoml.xgboost.load_model("xgb_custom")
booster

<xgboost.core.Booster at 0x7ff9287609d0>

In [26]:
# test model on sample data
import numpy as np
sample = np.random.random(size=(1,7))

booster.predict(xgb.DMatrix(sample))

array([0.17240405], dtype=float32)

In [29]:
tag = bentoml.models.get("xgb_custom:latest")
xgb_runner = tag.to_runner()