# Financial Fraud Detection

- The objective of this notebook is to showcase the usage of the ___financial-fraud-training___ NIM (microservice) (NEED LINK) and how to deploy the produced trained models on the Triton Inference Server.
- We use [IBM TabFromer](https://github.com/IBM/TabFormer) as an example dataset
- That datset is then preprocess before running through the training NIM.

NOTICE:
- This notebook assume that you have followed the pre

NOTE: The preprocessing code is written specifically for the TabFormer dataset and will not work with other datasets.

#### Import libraries

In [None]:
import os
import subprocess
import sys
import json
import time

----
# Step 1: Get and Prepare the data

___Unfortunatley the data need to be downloaded manually___

1. Download the dataset: https://ibm.ent.box.com/v/tabformer-data/folder/130747715605
2. untar and uncompreess the file: `tar -xvzf ./transactions.tgz`
3. Put card_transaction.v1.csv in in the `TabFormer/raw` folder

The goal is to produce the following structure

```
.
    data
    └── TabFormer
        └── raw
            └── card_transaction.v1.csv
```

In [None]:
# Once the raw data is placed as described above, set the path to the TabFormer directory

# Change this path to point to TabFormer data
data_root_dir = os.path.abspath('../data/TabFormer/') 

# Change this path to the directory where you want to save your model
model_output_dir = os.path.join(data_root_dir, 'trained_models')


# Path to save the trained model
os.makedirs(model_output_dir, exist_ok=True)

In [None]:
def print_tree(directory, prefix=""):
    """Recursively prints the directory tree starting at 'directory'."""
    # Retrieve a sorted list of entries in the directory
    entries = sorted(os.listdir(directory))
    entries_count = len(entries)
    
    for index, entry in enumerate(entries):
        path = os.path.join(directory, entry)
        # Determine the branch connector
        if index == entries_count - 1:
            connector = "└── "
            extension = "    "
        else:
            connector = "├── "
            extension = "│   "
        
        print(prefix + connector + entry)
        
        # If the entry is a directory, recursively print its contents
        if os.path.isdir(path):
            print_tree(path, prefix + extension)

In [None]:
# Check if the raw data has been placed properly
print_tree(data_root_dir)

---
# Step 2: Preprocess the data 
- Import the Python function for preprocessing the TabFormer data
- Call `preprocess_TabFormer` function to prepare the data


In [None]:
# Add the "src" directory to the search path
src_dir = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), 'src'))
sys.path.insert(0, src_dir)

# should be able to import from "src" folder now
from preprocess_TabFormer import preprocess_data

In [None]:
# Preprocess the data
mask_mapping, feature_mask = preprocess_data(data_root_dir)

# this will output status as it correlates different attributes with target column

In [None]:
# You should not see files under a "gnn" folder and under a "xgb" folder
print_tree(data_root_dir)

-----
# Step 3:  Now train the model using the financial-fraud-training NIM


### Create training configuration file
NOTE: Training configuration file must conform to the training schemas defined in financial-fraud-training NIM  (NOTE:  NEED A LINK TO THE DOCS)

__Important: Models and configuration files needed for deployment using the Triton Inference server will be saved in model-repository under the folder that is mounted in /trained_models inside the NIM container__

In [None]:
training_config = {
  "paths": {
    "data_dir": "/data", # Mount dataset root directory under /data in the container
    "output_dir": "/trained_models" # Mount path to save the trained models.
                                    # NOTE: This path is inside the docker container 
  },

  "models": [
    {
      "kind": "GraphSAGE_XGBoost",
      "gpu": "single",
      "hyperparameters": {
        "gnn":{
          "hidden_channels": 16,
          "n_hops": 1,
          "dropout_prob": 0.1,
          "batch_size": 1024,
          "fan_out": 16,
          "num_epochs": 16
        },
        "xgb": {
          "max_depth": 6,
          "learning_rate": 0.2,
          "num_parallel_tree": 3,
          "num_boost_round": 512,
          "gamma": 0.0
        }

      }
    }
  ]
}


#### Save the training configuration file as a json file

In [None]:
training_config_file_name = 'training_config.json'

with open(os.path.join(training_config_file_name), 'w') as json_file:
    json.dump(training_config, json_file, indent=4)

### Pull and run the financial_fraud_training NIM

In [None]:
API_KEY=os.environ.get('NGC_API_KEY')
## NEED TO PULL CONTAINER ONCE IT IS IN NGC

#### Login and pull the image from the NGC registry

In [None]:
!docker login nvcr.io --username '$oauthtoken' --password {API_KEY}

In [None]:
!docker pull nvcr.io/nvstaging/nim/financial-fraud-training:1.0.0-rc1

#### Create a local cache directory

In [None]:
username = subprocess.run(["whoami"], capture_output=True, text=True).stdout.strip()
nim_cache_dir = f'/home/{username}/.cache/nim'

In [None]:
!mkdir -p {nim_cache_dir}

#### Set container name and ports for running the container

In [None]:
NIM_HTTP_PORT = 8002
NIM_GRPC_PORT = 50051
CONTAINER_NAME = "financial-fraud-training"

In [None]:
# Stop any running container with the same name
!docker stop {CONTAINER_NAME}
!docker rm {CONTAINER_NAME}

#### Run the container

In [None]:
!docker run -d -it --rm --name={CONTAINER_NAME} --gpus all \
    -p {NIM_HTTP_PORT}:{NIM_HTTP_PORT} -e NIM_HTTP_API_PORT={NIM_HTTP_PORT} -p {NIM_GRPC_PORT}:{NIM_GRPC_PORT} \
    -e NIM_DISABLE_MODEL_DOWNLOAD=True -e NIM_GRPC_API_PORT={NIM_GRPC_PORT} -e NIM_CACHE_PATH=/opt/nim/.cache \
    -e NIM_CACHE_PATH=/opt/nim/.cache  --mount=type=bind,src={nim_cache_dir},dst=/opt/nim/.cache -v {data_root_dir}:/data \
    -v {model_output_dir}:/trained_models nvcr.io/nvstaging/nim/financial-fraud-training:1.0.0-rc1 -e NGC_API_KEY={API_KEY}

In [None]:
time.sleep(5)

### Finally, initiate model training using the training configuration defined earlier

- Initiate training via the /train endpoint by sending the training configuration as a JSON payload.

In [None]:

cmd = [
    "curl",
    "-X", "POST",
    "-H", "Content-Type: application/json",
    "-d", json.dumps(training_config),
    f"http://0.0.0.0:{NIM_HTTP_PORT}/train"
]
# result = subprocess.run(cmd, capture_output=True, text=True)
# result.stdout

In [None]:
!curl -X POST "http://0.0.0.0:$NIM_HTTP_PORT/train"   -H "Content-Type: application/json"   -d @{training_config_file_name}

#### Make sure that the `model_repository` has been created with right contents in it
According the above defined configuration file, the `model_repository`, which is folder containing the models and configuration files to be deployed on the Triton inference Server, will be created under 
{data_root_dir}/trained_models/ and its contents will look like

```sh
├── model
│   ├── 1
│   │   └── graph_sage_node_embedder.onnx
│   └── config.pbtxt
└── xgboost
    ├── 1
    │   └── xgboost_on_embeddings.json
    └── config.pbtxt

```


In [None]:
print_tree(os.path.join(model_output_dir, 'model_repository'))

----
# Step 3:  Serve your model on Triton Inference Server

!Important: Change MODEL_REPO_PATH to point to the `model repository` folder if you used different path in your training configuration file

#### Install tritonclient


In [None]:
!pip install tritonclient[all]

In [None]:
import tritonclient.grpc as triton_grpc
import tritonclient.http as httpclient
from tritonclient import utils as triton_utils

In [None]:
# Set to False for remote/cloud deployment
run_locally = True 

##### Replace HOST with the actual server URL where your Triton Inference Server is hosted.


In [None]:
if run_locally:
    HOST = 'localhost'
else:
    HOST = '<SERVER_URL>' # Replace with your server URL or IP address

TRITON_HTTP_PORT = 8000
TRITON_GRPC_PORT = 8001

### If you are testing a local deployment
- Pull Triton inference server docker image
- Deploy server with  models and configuration files (produced by the training NIM)
- Double check that your model repository folder has the following structures
```sh
├── model
│   ├── 1
│   │   └── graph_sage_node_embedder.onnx
│   └── config.pbtxt
└── xgboost
    ├── 1
    │   └── xgboost_on_embeddings.json
    └── config.pbtxt
```

In [None]:
if run_locally:
    
    # Triton server image
    TRITON_IMAGE = 'nvcr.io/nvidia/tritonserver:25.01-py3'
    MODEL_REPO_PATH = os.path.join(model_output_dir, 'model_repository')

    # Pull docker 
    !docker pull {TRITON_IMAGE}
    !docker stop tritonserver
    !docker rm tritonserver

    !docker run --gpus all -d -p {TRITON_HTTP_PORT}:{TRITON_HTTP_PORT} -p {TRITON_GRPC_PORT}:{TRITON_GRPC_PORT} -v {MODEL_REPO_PATH}:/models --name tritonserver {TRITON_IMAGE} tritonserver --model-repository=/models



### URLs for GRPC and HTTP request to the inference server

In [None]:
client_grpc = triton_grpc.InferenceServerClient(url=f'{HOST}:{TRITON_GRPC_PORT}')
client_http = httpclient.InferenceServerClient(url=f'{HOST}:{TRITON_HTTP_PORT}')

### Wait for the triton inference server to come online
NOTE: If the following cell keeps running longer then interrupt execution and run again.

In [None]:

TIMEOUT = 60
client_grpc = triton_grpc.InferenceServerClient(url=f'{HOST}:{TRITON_GRPC_PORT}')
server_start = time.time()
while True:
    try:
        if client_grpc.is_server_ready() or time.time() - server_start > TIMEOUT:
            break
    except triton_utils.InferenceServerException:
        pass
    time.sleep(1)


### For local deployment, check if the triton inference server is running properly

In [None]:
if run_locally:
    !docker logs tritonserver

### Read preprocessed input transactions to make query to the triton inference server

In [None]:
import pandas as pd
import numpy as np

test_path = os.path.join(data_root_dir, "xgb/test.csv") # already preprocessed data
test_df = pd.read_csv(test_path)
X = test_df.iloc[:, :-1].values.astype(np.float32)
y = test_df.iloc[:, -1].values
edge_index = np.array([[], []]).astype(np.int64) # empty edge_index

### Setup the HTTP request's inputs and output to retrieve embeddings for the input transactions

In [None]:
input_features = httpclient.InferInput("x", X.shape, datatype="FP32")
input_features.set_data_from_numpy(X)

input_edge_indices = httpclient.InferInput("edge_index", edge_index.shape, datatype="INT64")
input_edge_indices.set_data_from_numpy(edge_index)

outputs = httpclient.InferRequestedOutput("output")

### Send a query to retrieve embeddings

In [None]:
# Querying the server
results = client_http.infer(model_name="model", inputs=[input_features, input_edge_indices], outputs=[outputs])
node_embeddings = results.as_numpy('output')


### Use the retrieved embeddings as inputs to predict the transactions' fraud scores

In [None]:
xgboost_input = httpclient.InferInput("input__0", node_embeddings.shape, datatype="FP32")
xgboost_input.set_data_from_numpy(node_embeddings)

xgboost_outputs = httpclient.InferRequestedOutput("output__0")

### Send a query to retrieve the fraud scores

In [None]:
results = client_http.infer(model_name="xgboost", inputs=[xgboost_input], outputs=[xgboost_outputs])
predictions = results.as_numpy('output__0')

### Evaluate performance

In [None]:
# Decision threshold to flag a transaction as fraud
#Change to trade-off precision and recall
decision_threshold = 0.5

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

y_pred = (predictions > decision_threshold).astype(int)


# Compute evaluation metrics
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred, zero_division=0)
recall = recall_score(y, y_pred, zero_division=0)
f1 = f1_score(y, y_pred, zero_division=0)

print("----Summary---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


### Compute confusion matrix 

In [None]:
import pandas as pd
# Create a DataFrame with labeled rows and columns
classes = ['Non-Fraud', 'Fraud']
columns = pd.MultiIndex.from_product([["Predicted"], classes])
index = pd.MultiIndex.from_product([["Actual"], classes])

conf_mat = confusion_matrix(y, y_pred)
cm_df = pd.DataFrame(conf_mat, index=index, columns=columns)
print(cm_df)

### Plot confusion matrix

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

# Plot the confusion matrix directly from predictions
disp = ConfusionMatrixDisplay.from_predictions(
    y, y_pred, display_labels=classes)
disp.ax_.set_title('Confusion Matrix')
plt.show()

----
# Step 4:  Serve your python backend model on Triton Inference Server

!Important: Change MODEL_REPO_PATH to point to the `model python_backend_model_repository` folder if you used different path in your training configuration file

## Deploy python backend model

In [None]:
if run_locally:
    HOST = 'localhost'
else:
    HOST = '<SERVER_URL>' # Replace with your server URL or IP address

HTTP_PORT = 8005
GRPC_PORT = 8006
METRICS_PORT = 8007

In [None]:
if run_locally:
    
    # Triton server image
    TRITON_IMAGE = 'nvcr.io/nvidia/tritonserver:25.01-py3'
    MODEL_REPO_PATH = os.path.join(model_output_dir, 'python_backend_model_repository')

    # Pull docker 
    !docker pull {TRITON_IMAGE}
    !docker stop tritonserver
    !docker rm tritonserver

    !docker run --gpus all -d -p {HTTP_PORT}:{HTTP_PORT} -p {GRPC_PORT}:{GRPC_PORT} \
        -v {MODEL_REPO_PATH}:/models --name tritonserver {TRITON_IMAGE} tritonserver \
        --model-repository=/models   --http-port={HTTP_PORT} --grpc-port={GRPC_PORT} \
        --metrics-port={METRICS_PORT}

In [None]:
client_grpc = triton_grpc.InferenceServerClient(url=f'{HOST}:{GRPC_PORT}')

In [None]:
import subprocess

container_name = "tritonserver"
client_grpc = triton_grpc.InferenceServerClient(url=f'{HOST}:{GRPC_PORT}')

while True:
    try:
        if client_grpc.is_server_ready():
            break
    except triton_utils.InferenceServerException as e:
        pass
    try:
        # Run the docker logs command with the --tail option
        output = subprocess.check_output(["docker", "logs", "--tail", "10", container_name])
        print(output.decode("utf-8"))
    except subprocess.CalledProcessError as e:
        print("Error retrieving logs:", e)
    time.sleep(10)

## Prediction without computing Shapley values

In [None]:
import os
import pandas as pd
import numpy as np

model_name = "prediction_and_shapley"
test_path = os.path.join(data_root_dir, "xgb/test.csv") # already preprocessed data
test_df = pd.read_csv(test_path)
X = test_df.iloc[:, :-1].values.astype(np.float32)
y = test_df.iloc[:, -1].values
edge_index = np.array([[], []]).astype(np.int64) # empty edge_index
compute_shap = np.array([False], dtype=bool) # Skip shap value computation

In [None]:
feature_mask = feature_mask.astype(np.int32)

In [None]:
with httpclient.InferenceServerClient(f"localhost:{HTTP_PORT}") as client:
    input_features = httpclient.InferInput("NODE_FEATURES", X.shape, datatype="FP32")
    input_features.set_data_from_numpy(X)

    input_edge_indices = httpclient.InferInput("EDGE_INDEX", edge_index.shape, datatype="INT64")
    input_edge_indices.set_data_from_numpy(edge_index)

    input_feature_mask = httpclient.InferInput("FEATURE_MASK", feature_mask.shape, datatype="INT32")
    input_feature_mask.set_data_from_numpy(feature_mask)

    compute_shap_flag = httpclient.InferInput("COMPUTE_SHAP", compute_shap.shape, datatype="BOOL")
    compute_shap_flag.set_data_from_numpy(compute_shap)
    
    outputs = [
        httpclient.InferRequestedOutput("PREDICTION"),
        httpclient.InferRequestedOutput("SHAP_VALUES")
    ]
    response = client.infer(model_name, inputs=[input_features, input_edge_indices, compute_shap_flag, input_feature_mask ], request_id=str(1), outputs=outputs)


In [None]:
predictions = response.as_numpy('PREDICTION')

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

y_pred = (predictions > 0.5).astype(int)


# Compute evaluation metrics
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred, zero_division=0)
recall = recall_score(y, y_pred, zero_division=0)
f1 = f1_score(y, y_pred, zero_division=0)

print("----Summary---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


In [None]:
import pandas as pd
# Create a DataFrame with labeled rows and columns
classes = ['Non-Fraud', 'Fraud']
columns = pd.MultiIndex.from_product([["Predicted"], classes])
index = pd.MultiIndex.from_product([["Actual"], classes])

conf_mat = confusion_matrix(y, y_pred)
cm_df = pd.DataFrame(conf_mat, index=index, columns=columns)
print(cm_df)

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

# Plot the confusion matrix directly from predictions
disp = ConfusionMatrixDisplay.from_predictions(
    y, y_pred, display_labels=classes)
disp.ax_.set_title('Confusion Matrix')
plt.show()

## Compute Shapley value for different features for a transaction

In [None]:

# Set COMPUTE_SHAP flag to True
compute_shap = np.array([True], dtype=bool)

X = test_df.iloc[:1, :-1].values.astype(np.float32)
y = test_df.iloc[:1, -1].values

with httpclient.InferenceServerClient(f"localhost:{HTTP_PORT}") as client:
    input_features = httpclient.InferInput("NODE_FEATURES", X.shape, datatype="FP32")
    input_features.set_data_from_numpy(X)

    input_edge_indices = httpclient.InferInput("EDGE_INDEX", edge_index.shape, datatype="INT64")
    input_edge_indices.set_data_from_numpy(edge_index)

    input_feature_mask = httpclient.InferInput("FEATURE_MASK", feature_mask.shape, datatype="INT32")
    input_feature_mask.set_data_from_numpy(feature_mask)

    compute_shap_flag = httpclient.InferInput("COMPUTE_SHAP", compute_shap.shape, datatype="BOOL")
    compute_shap_flag.set_data_from_numpy(compute_shap)
    
    outputs = [
        httpclient.InferRequestedOutput("PREDICTION"),
        httpclient.InferRequestedOutput("SHAP_VALUES")
    ]
    response = client.infer(model_name, inputs=[input_features, input_edge_indices, compute_shap_flag, input_feature_mask ], request_id=str(1), outputs=outputs)


predictions= response.as_numpy('PREDICTION')
shap_values = response.as_numpy('SHAP_VALUES')

In [None]:
feature_to_attribution_map = dict(zip(feature_mask, shap_values[0]))
feature_name_to_id_map = {v:k  for k,v in mask_mapping.items()}

#### Shapley values for different features

In [None]:
{feature_name_to_id_map[k]: f"{v:.3f}" for k, v in feature_to_attribution_map.items()}