In [None]:
# model from https://www.kaggle.com/code/marcelotc/creditcard-fraud-xgboost-example
# dataset from https://www.openml.org/search?type=data&sort=runs&id=42175&status=active

In [46]:
import sys
import arff
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [47]:
# prepare dataset

with open('dataset.arff', 'r') as f:
    data = arff.load(f)

df = pd.DataFrame(data['data'], columns=[attr[0] for attr in data['attributes']])
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [54]:
# train model


def split_data(df):
    feature_names = df.iloc[:, 1:30].columns
    target = df.iloc[:1, 30:].columns
    data_features = df[feature_names]
    data_target = df[target]
    X_train, X_test, y_train, y_test = train_test_split(data_features,
                                                        data_target,
                                                        train_size = 0.70,
                                                        test_size = 0.30,
                                                        random_state = 1,
                                                        shuffle =False,)
    return X_train, X_test, y_train, y_test


def train_model(X_train, y_train):
    xg = xgb.XGBClassifier()
    xg.fit(X_train, y_train)
    return xg

In [56]:
X_train, X_test, y_train, y_test = split_data(df.sort_values("Amount"))

model = train_model(X_train, y_train)
model.predict(X_test)

array([0, 0, 0, ..., 0, 0, 1])

In [50]:
import bentoml

bentoml.xgboost.save_model("fraud_det", model, signatures={"predict": dict()})

Model(tag="fraud_det:ovw2nhwpngtu2usu", path="/home/agent/bentoml/models/fraud_det/ovw2nhwpngtu2usu/")

In [70]:
!curl -X POST -H "Content-Type: application/json" -d @test.json http://127.0.0.1:3000/predict

'{"V1":1.2488036317,"V2":0.0472080527,"V3":0.423387603,"V4":-0.1395152874,"V5":-0.592216995,"V6":-0.9806539078,"V7":-0.0424160604,"V8":-0.1230440069,"V9":-0.0889622192,"V10":-0.0344388456,"V11":1.7045288079,"V12":1.0742192446,"V13":0.0915403913,"V14":0.4517787353,"V15":0.3165205589,"V16":0.3421846426,"V17":-0.448283751,"V18":-0.2527195704,"V19":0.3794540934,"V20":-0.0646175828,"V21":-0.1662153226,"V22":-0.5015981644,"V23":0.1249953403,"V24":0.5940703828,"V25":0.11962603,"V26":0.8165008485,"V27":-0.0894647479,"V28":-0.0040547877,"Amount":0.0}'

In [72]:
model.predict(X_test)

array([0, 0, 0, ..., 0, 0, 1])

### create a service.py

In [62]:
%%writefile service.py

import bentoml
import pandas as pd

runner = bentoml.xgboost.get("fraud_det").to_runner()
# runner2 = bentoml.pytorch.get....
svc = bentoml.Service("credit_card_fraud_detection", runners=[runner])

@svc.api(input=bentoml.io.JSON(), output=bentoml.io.JSON())
def predict(input_json):
    df = pd.DataFrame.from_dict(input_json, orient='index').transpose()
    result = runner.predict.run(df)[0]
    return {"result": result}

Overwriting service.py


In [59]:
X_test.iloc[0].to_json()

'{"V1":1.3635545503,"V2":-0.6507625155,"V3":-0.2589375869,"V4":-0.9727123819,"V5":-0.4249316841,"V6":-0.2340828234,"V7":-0.4405079618,"V8":-0.0750291638,"V9":-1.0591912293,"V10":0.8109454536,"V11":0.468825442,"V12":-0.3885153607,"V13":0.0649803189,"V14":0.1156530262,"V15":0.0605835482,"V16":1.5096996123,"V17":-0.3289840144,"V18":-0.5145656267,"V19":1.1914825353,"V20":0.1995154529,"V21":0.2216831274,"V22":0.4262267792,"V23":-0.2965709432,"V24":-0.6786250546,"V25":0.7171887473,"V26":-0.0735623156,"V27":-0.0216865147,"V28":-0.0022274374,"Amount":59.8}'

### specify the environment by bentofile.yaml

In [67]:
%%writefile bentofile.yaml

service: "service:svc"
python:
  packages:
    - xgboost
    - bentoml-plugins-arize

Overwriting bentofile.yaml


In [None]:
!bentoml serve .

In [74]:
X_train

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
15816,1.248804,0.047208,0.423388,-0.139515,-0.592217,-0.980654,-0.042416,-0.123044,-0.088962,-0.034439,...,-0.064618,-0.166215,-0.501598,0.124995,0.594070,0.119626,0.816501,-0.089465,-0.004055,0.0
77470,-1.188664,-0.612034,2.422204,-0.812786,0.318493,-0.671637,-0.432053,0.068237,0.684183,-0.410662,...,-0.044822,0.002347,0.164823,-0.096075,0.393014,-0.608295,0.745069,-0.230070,-0.018679,0.0
190885,1.868263,0.273764,-0.288023,3.835852,0.268329,0.817380,-0.287993,0.203258,-0.504927,1.450221,...,-0.299850,0.115927,0.610472,0.024259,-0.454322,0.117948,0.144547,-0.002638,-0.061637,0.0
87335,-0.848470,1.426562,2.137094,2.852036,-0.366945,1.158146,-0.416142,0.812490,-0.688381,0.302842,...,0.186925,-0.210710,-0.369433,-0.271748,-0.571453,0.232587,0.273226,0.248532,0.088226,0.0
174481,-1.184195,0.804518,2.240498,2.853175,1.038068,0.171728,0.457665,0.290123,-1.100519,0.283850,...,-0.189527,0.110209,0.254591,-0.475647,-0.046345,0.888929,0.225662,-0.007746,0.057191,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21282,1.288823,-0.667240,0.171935,-0.717481,-0.997716,-0.858751,-0.458894,-0.109104,-1.037500,0.827694,...,0.125266,0.283858,0.570425,-0.159770,0.344318,0.555530,-0.136097,-0.024254,0.006984,59.8
82623,1.243285,-1.225682,-0.165949,-1.721136,-0.307066,1.304924,-0.963734,0.428492,-2.336862,1.408065,...,-0.381326,-0.391960,-0.805981,0.247735,-1.419112,-0.091887,-0.400574,0.065765,-0.001657,59.8
22942,1.288333,-0.742320,0.146965,-0.709788,-1.084582,-0.874912,-0.530117,-0.048522,-0.881143,0.857917,...,0.059641,0.271793,0.484509,-0.146831,0.328736,0.521048,-0.133200,-0.031106,0.004884,59.8
121235,-0.912355,0.987125,0.464520,-0.292547,0.353597,0.231195,0.557031,0.512343,-0.956123,-0.491173,...,-0.164577,-0.288355,-1.104393,0.231838,-0.861712,-0.770104,-0.187350,-0.123038,0.055210,59.8


### indentify monitoring data: features and predictions

In [63]:
%%writefile service.py

import bentoml
import pandas as pd

runner = bentoml.xgboost.get("fraud_det").to_runner()
svc = bentoml.Service("credit_card_fraud_detection", runners=[runner])

@svc.api(input=bentoml.io.JSON(), output=bentoml.io.JSON())
def predict(input_json):
    with bentoml.monitor("fraud_det") as mon:
        mon.log(input_json['Amount'], name="Amount", role="feature", data_type="numerical")
        for i in range(1, 29):
            col_name = f"V{i}"
            mon.log(input_json[col_name], name=col_name, role="feature", data_type="numerical")

        df = pd.DataFrame.from_dict(input_json, orient='index').transpose()
        result = runner.predict.run(df)[0]

        mon.log(input_json['Amount'], name="prediction", role="prediction", data_type="categorical")

    return {"result": result}

Overwriting service.py


In [None]:
!bentoml serve service:svc

### Deployment Config to ship data to arize

In [76]:
%%writefile bentoml_deployment.yaml

monitoring:
  enabled: true
  type: bentoml_plugins.arize.ArizeMonitor
  options:
    space_key: a3854d6
    api_key: be00f3850d11bb3ea12

Overwriting bentoml_deployment.yaml


In [None]:
!BENTOML_CONFIG=bentoml_deployment.yaml bentoml serve service:svc

In [None]:
X_test.iloc[0].to_json()

In [None]:
y_pred = m2.predict(X_test)
accuracy_score(y_pred, y_test)

In [None]:
import requests
for line in X_test.iloc[20000:20100, :].iloc:
    json_str = line.to_json()
    print(json_str)
    requests.post("http://127.0.0.1:3000/predict", data=json_str)

In [None]:
X_test.iloc[:, 1:30]

In [None]:
# 25 - 27 mins