# BentoML Demo - IEEE-CIS Fraud Detection

Accept dataset rules on Kaggle before downloading: https://www.kaggle.com/competitions/ieee-fraud-detection/data

In [1]:
# Set Kaggle Credentials for downloading dataset
# %env KAGGLE_USERNAME=
# %env KAGGLE_KEY=

In [2]:
# !kaggle competitions download -c ieee-fraud-detection
# !rm -rf ./data/
# !unzip -d ./data/ ieee-fraud-detection.zip && rm ieee-fraud-detection.zip

In [3]:
import pandas as pd
import numpy as np

df_transactions = pd.read_csv("./data/train_transaction.csv")

X = df_transactions.drop(columns=["isFraud"])
y = df_transactions.isFraud

In [4]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import (
    StandardScaler,
    OneHotEncoder,
    LabelEncoder,
    OrdinalEncoder,
)
from sklearn.feature_selection import SelectPercentile, chi2

numeric_features = df_transactions.select_dtypes(include="float64").columns
categorical_features = df_transactions.select_dtypes(include="object").columns

preprocessor = ColumnTransformer(
    transformers=[
        ("num", SimpleImputer(strategy="median"), numeric_features),
        (
            "cat",
            OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1),
            categorical_features,
        ),
    ],
    verbose_feature_names_out=False,
    remainder="passthrough",
)
preprocessor.set_output(transform="pandas")

In [5]:
X = preprocessor.fit_transform(X)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [7]:
import xgboost as xgb


def train(n_estimators, max_depth):
    return xgb.XGBClassifier(
        tree_method="hist",
        n_estimators=n_estimators,
        max_depth=max_depth,
        eval_metric="aucpr",
        objective="binary:logistic",
        enable_categorical=True,
    ).fit(X_train, y_train, eval_set=[(X_test, y_test)])

In [8]:
# small model with 300 gradient boosted trees and a maximum tree depth of 5
model_sm = train(300, 5)

[0]	validation_0-aucpr:0.35057
[1]	validation_0-aucpr:0.39705
[2]	validation_0-aucpr:0.43012
[3]	validation_0-aucpr:0.45298
[4]	validation_0-aucpr:0.46152
[5]	validation_0-aucpr:0.47433
[6]	validation_0-aucpr:0.47888
[7]	validation_0-aucpr:0.48862
[8]	validation_0-aucpr:0.50092
[9]	validation_0-aucpr:0.50552
[10]	validation_0-aucpr:0.51490
[11]	validation_0-aucpr:0.52066
[12]	validation_0-aucpr:0.52491
[13]	validation_0-aucpr:0.52819
[14]	validation_0-aucpr:0.53432
[15]	validation_0-aucpr:0.53826
[16]	validation_0-aucpr:0.54470
[17]	validation_0-aucpr:0.55693
[18]	validation_0-aucpr:0.56090
[19]	validation_0-aucpr:0.56635
[20]	validation_0-aucpr:0.57033
[21]	validation_0-aucpr:0.57188
[22]	validation_0-aucpr:0.57653
[23]	validation_0-aucpr:0.57981
[24]	validation_0-aucpr:0.58228
[25]	validation_0-aucpr:0.58583
[26]	validation_0-aucpr:0.58842
[27]	validation_0-aucpr:0.58951
[28]	validation_0-aucpr:0.59201
[29]	validation_0-aucpr:0.59523
[30]	validation_0-aucpr:0.59629
[31]	validation_0-

In [9]:
import bentoml

bentoml.xgboost.save_model(
    "ieee-fraud-detection-sm",
    model_sm,
    signatures={
        "predict_proba": {"batchable": True},
    },
    custom_objects={"preprocessor": preprocessor},
)

Model(tag="ieee-fraud-detection-sm:l5e27oxepwu45uqj", path="/Users/chaoyuyang/bentoml/models/ieee-fraud-detection-sm/l5e27oxepwu45uqj/")

In [10]:
model_ref = bentoml.xgboost.get("ieee-fraud-detection-sm:latest")
model_ref

Model(tag="ieee-fraud-detection-sm:l5e27oxepwu45uqj", path="/Users/chaoyuyang/bentoml/models/ieee-fraud-detection-sm/l5e27oxepwu45uqj")

In [11]:
import bentoml
import pandas as pd
import numpy as np

model_ref = bentoml.xgboost.get("ieee-fraud-detection-sm:latest")
model_runner = model_ref.to_runner()
model_runner.init_local()
model_preprocessor = model_ref.custom_objects["preprocessor"]

test_transactions = pd.read_csv("./data/test_transaction.csv")[0:500]
test_transactions = model_preprocessor.transform(test_transactions)
result = model_runner.predict_proba.run(test_transactions)
np.argmax(result, axis=1)

'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

For the Inference Graph demo, let's train two additional models by tweaking the parameters:

In [12]:
# large model with 3000 gradient boosted trees and a maximum tree depth of 15
model_lg = train(3000, 15)

[0]	validation_0-aucpr:0.51754
[1]	validation_0-aucpr:0.55831
[2]	validation_0-aucpr:0.59566
[3]	validation_0-aucpr:0.62030
[4]	validation_0-aucpr:0.64198
[5]	validation_0-aucpr:0.65163
[6]	validation_0-aucpr:0.66518
[7]	validation_0-aucpr:0.68211
[8]	validation_0-aucpr:0.69342
[9]	validation_0-aucpr:0.70297
[10]	validation_0-aucpr:0.71259
[11]	validation_0-aucpr:0.72505
[12]	validation_0-aucpr:0.73570
[13]	validation_0-aucpr:0.74534
[14]	validation_0-aucpr:0.75477
[15]	validation_0-aucpr:0.76126
[16]	validation_0-aucpr:0.76926
[17]	validation_0-aucpr:0.77453
[18]	validation_0-aucpr:0.77792
[19]	validation_0-aucpr:0.78178
[20]	validation_0-aucpr:0.78541
[21]	validation_0-aucpr:0.78715
[22]	validation_0-aucpr:0.78958
[23]	validation_0-aucpr:0.79153
[24]	validation_0-aucpr:0.79416
[25]	validation_0-aucpr:0.79516
[26]	validation_0-aucpr:0.79569
[27]	validation_0-aucpr:0.79788
[28]	validation_0-aucpr:0.79897
[29]	validation_0-aucpr:0.79988
[30]	validation_0-aucpr:0.80037
[31]	validation_0-

In [13]:
import bentoml

bentoml.xgboost.save_model(
    "ieee-fraud-detection-lg",
    model_lg,
    signatures={
        "predict_proba": {"batchable": True},
    },
    custom_objects={"preprocessor": preprocessor},
)

Model(tag="ieee-fraud-detection-lg:wwbe4mhep6u45uqj", path="/Users/chaoyuyang/bentoml/models/ieee-fraud-detection-lg/wwbe4mhep6u45uqj/")

In [14]:
# tiny model with 300 gradient boosted trees and a maximum tree depth of 5
model_tiny = train(100, 3)

[0]	validation_0-aucpr:0.26747
[1]	validation_0-aucpr:0.33222
[2]	validation_0-aucpr:0.36089
[3]	validation_0-aucpr:0.37002
[4]	validation_0-aucpr:0.38372
[5]	validation_0-aucpr:0.40563
[6]	validation_0-aucpr:0.42298
[7]	validation_0-aucpr:0.42626
[8]	validation_0-aucpr:0.43737
[9]	validation_0-aucpr:0.44028
[10]	validation_0-aucpr:0.45112
[11]	validation_0-aucpr:0.45549
[12]	validation_0-aucpr:0.45894
[13]	validation_0-aucpr:0.46257
[14]	validation_0-aucpr:0.46803
[15]	validation_0-aucpr:0.47087
[16]	validation_0-aucpr:0.47482
[17]	validation_0-aucpr:0.47754
[18]	validation_0-aucpr:0.48051
[19]	validation_0-aucpr:0.48570
[20]	validation_0-aucpr:0.48867
[21]	validation_0-aucpr:0.49322
[22]	validation_0-aucpr:0.49565
[23]	validation_0-aucpr:0.49858
[24]	validation_0-aucpr:0.50072
[25]	validation_0-aucpr:0.50380
[26]	validation_0-aucpr:0.50651
[27]	validation_0-aucpr:0.50923
[28]	validation_0-aucpr:0.51094
[29]	validation_0-aucpr:0.51211
[30]	validation_0-aucpr:0.51472
[31]	validation_0-

In [15]:
import bentoml

bentoml.xgboost.save_model(
    "ieee-fraud-detection-tiny",
    model_tiny,
    signatures={
        "predict_proba": {"batchable": True},
    },
    custom_objects={"preprocessor": preprocessor},
)

Model(tag="ieee-fraud-detection-tiny:ygxwgrxep6u45uqj", path="/Users/chaoyuyang/bentoml/models/ieee-fraud-detection-tiny/ygxwgrxep6u45uqj/")