In [8]:
import pickle
import os
from minio import Minio
from minio.error import S3Error

import requests
from requests.auth import HTTPBasicAuth
import urllib3

from typing import Dict, Any, Optional

%load_ext lab_black

# dataset_encoder_dir path is generated in Pipeline and can be found in the Input/Ouput section of 'Preprocess Dataset'
dataset_encoder_dir = "artifacts/fraud-detection-bwtnn/2024/04/23/fraud-detection-bwtnn-536829612/preprocess-dataset-dataset_encoder_dir.tgz"

encoder_artifact = {
    "name": "preprocess-dataset-dataset_encoder_dir",
    "path": "/tmp/outputs/dataset_encoder_dir/data",
    "s3": {"key": dataset_encoder_dir},
}

# MinIO configuration
minio_endpoint = "172.30.131.125:9000"  # e.g., 'localhost:9000'
minio_access_key = "minio"
minio_secret_key = "minio123"

# File details
bucket_name = "mlpipeline"
local_file_path = os.path.abspath("preprocess-dataset-dataset_encoder_dir.tgz")

# Create a MinIO client with the endpoint and access keys
minio_client = Minio(
    minio_endpoint,
    access_key=minio_access_key,
    secret_key=minio_secret_key,
    secure=False,  # Set to False if the MinIO server doesn't support HTTPS
)

try:
    # Get the object and write it to the local file path
    minio_client.fget_object(
        bucket_name, encoder_artifact["s3"]["key"], local_file_path
    )
    print(f"File downloaded successfully to {local_file_path}")
except Exception as exc:
    print(f"An error occurred: {exc}")

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [25]:
def get_s3_object(
    s3_artifact: Dict[str, str],
    f_type: str = "tgz",
    bucket_name: str = "mlpipeline",
    minio_endpoint: str = "172.30.131.125:9000",
    minio_access_key: str = "minio",
    minio_secret_key: str = "minio123",
):

    local_file_path = os.path.abspath(f'{s3_artifact["name"]}.{f_type}')
    # Create a MinIO client with the endpoint and access keys
    minio_client = Minio(
        minio_endpoint,
        access_key=minio_access_key,
        secret_key=minio_secret_key,
        secure=False,  # Set to False if the MinIO server doesn't support HTTPS
    )

    try:
        # Get the object and write it to the local file path
        minio_client.fget_object(bucket_name, s3_artifact["s3"]["key"], local_file_path)
        print(f"File downloaded successfully to {local_file_path}")
    except Exception as exc:
        print(f"An error occurred: {exc}")

In [27]:
dataset_encoder_dir = "artifacts/fraud-detection-bwtnn/2024/04/23/fraud-detection-bwtnn-536829612/preprocess-dataset-dataset_encoder_dir.tgz"

encoder_artifact = {
    "name": "preprocess-dataset-dataset_encoder_dir",
    "path": "/tmp/outputs/dataset_encoder_dir/data",
    "s3": {"key": dataset_encoder_dir},
}

get_s3_object(encoder_artifact)

File downloaded successfully to /home/jovyan/IBMi-fraud-detection/notebooks/preprocess-dataset-dataset_encoder_dir.tgz


In [31]:
# download onnx model artifact

onnx_s3_dir = "artifacts/fraud-detection-bwtnn/2024/04/23/fraud-detection-bwtnn-3218414436/convert-model-to-onnx-onnx_model_dir.tgz"

onnx_artifact = {
    "name": "convert-model-to-onnx-onnx_model_dir",
    "path": "tmp/",
    "s3": {"key": f"{onnx_s3_dir}"},
}

get_s3_object(onnx_artifact)

File downloaded successfully to /home/jovyan/IBMi-fraud-detection/notebooks/convert-model-to-onnx-onnx_model_dir.tgz


In [36]:
import tarfile
import dill
import shutil

with tarfile.open(f"{onnx_artifact['name']}.tgz", "r:gz") as tar:
    tar.extractall("model")

In [37]:
import onnxruntime as ort

In [43]:
session = ort.InferenceSession("model/model.onnx", providers=["CPUExecutionProvider"])
session.get_session_options()

<onnxruntime.capi.onnxruntime_pybind11_state.SessionOptions at 0x7fff787793b0>

In [44]:
with open(os.path.join("encoders", "data", "mapper.pkl"), "rb") as f:
    t_mapper = dill.load(f)

t_mapper

In [46]:
from sklearn.preprocessing import (
    LabelEncoder,
    OneHotEncoder,
    FunctionTransformer,
    MinMaxScaler,
    LabelBinarizer,
)
from sklearn_pandas import DataFrameMapper
import pandas as pd

# from encoders import fraudEncoder, amtEncoder, decimalEncoder, timeEncoder


class FraudDatasetTransformer:
    def __init__(self):
        ...

    def transform(self, dataset: pd.DataFrame, mapper: DataFrameMapper):
        tdf = dataset.copy()
        tdf["merchant name"] = tdf["merchant name"].astype(str)
        tdf.drop(["mcc", "zip", "merchant state"], axis=1, inplace=True)
        tdf.sort_values(by=["user", "card"], inplace=True)
        tdf.reset_index(inplace=True, drop=True)

        tdf = mapper.transform(tdf)
        return tdf

In [47]:
test_input = {
    "index": 4,
    "user": 2,
    "card": 1,
    "year": 2006,
    "month": 5,
    "day": 25,
    "time": "16:17",
    "amount": "$37.77",
    "use chip": "Swipe Transaction",
    "merchant name": 3987158270252316808,
    "merchant city": "Algiers",
    "merchant state": "Algeria",
    "zip": 0,
    "mcc": 5921,
    "errors?": "",
    "is fraud?": "Yes",
}

dataset_transformer = FraudDatasetTransformer()


test_data = pd.DataFrame([test_input])
test_data

vdf = dataset_transformer.transform(test_data, t_mapper)
vdf.head()

Unnamed: 0,is fraud?,merchant name_0,merchant name_1,merchant name_2,merchant name_3,merchant name_4,merchant name_5,merchant name_6,merchant name_7,merchant name_8,...,"errors?_Bad Expiration,Technical Glitch,","errors?_Bad PIN,","errors?_Bad PIN,Insufficient Balance,","errors?_Bad PIN,Technical Glitch,","errors?_Bad Zipcode,","errors?_Insufficient Balance,","errors?_Insufficient Balance,Technical Glitch,","errors?_Technical Glitch,",year_month_day_time,amount
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0.424372,0.41138


In [58]:
import numpy as np
import json
import requests
from tensorflow import keras

# Data preparation
x = vdf.drop(vdf.columns.values[0], axis=1).to_numpy().astype(np.float32)
y = np.array([vdf[vdf.columns.values[0]].iloc[0]])

# Adjust the shape of x to match model expectations
# We need to expand or repeat our single data point to match the sequence length and feature count
# Assuming your single row is a flat array of features, reshape and repeat it
sequence_length = 4  # Model's expected sequence length
num_features = 103  # Model's expected number of features per sequence element

# Check if the original features match the required total features
original_features = x.shape[1]
print(original_features)
if original_features < num_features:
    print("pad maybe?")
    # If fewer, we may need to pad or adjust the data; this is situational and may not be exactly correct without more context
    # For now, let's assume padding with zeros is acceptable
    x_padded = np.pad(
        x,
        ((0, 0), (0, num_features - original_features)),
        mode="constant",
        constant_values=0,
    )
else:
    print("reshape accordingly")
    # If it matches or exceeds, truncate or reshape accordingly (though unusual for a single data point)
    x_padded = x[:, :num_features]

# Reshape to [1, sequence_length, num_features], replicating the single data point across the new sequence length
x_reshaped = np.tile(x_padded, (sequence_length, 1)).reshape(
    1, sequence_length, num_features
)

# Preparing the payload
payload = {
    "inputs": [
        {
            "name": "input_1",
            "shape": [1, sequence_length, num_features],
            "datatype": "FP32",
            "data": x_reshaped.tolist(),
        }
    ]
}


# # Sending the request
# res = requests.post(PREDICT_ENDPOINT, headers=HEADERS, data=json.dumps(payload))
# response = json.loads(res.text)

# # Handle response
# if "error" in response:
#     print(f"Error: {response['error']}")
# else:
#     print(response["outputs"])
#     pred = response["outputs"][0]["data"][0]
#     print(f"Actual ({y[0]}) vs. Prediction ({round(pred, 3)} => {int(round(pred, 0))})")

103
reshape accordingly


In [59]:
input_name = session.get_inputs()[0].name
sequence_length = session.get_inputs()[0].shape[1]  # Model's expected sequence length
num_features = session.get_inputs()[0].shape[2]
print(input_name)
print(sequence_length)
print(num_features)

input_1
4
103


In [60]:
outputs = session.run(None, {input_name: x_reshaped})

In [61]:
outputs

[array([[0.99695444]], dtype=float32)]