# Advanced KServe Deployment with Transformer and Explainer

In this notebook, we'll deploy a fraud detection model using KServe with:
- **Custom Transformer**: Preprocesses input data
- **Custom Predictor**: Loads and runs the trained model
- **Custom Explainer**: Provides SHAP-based explanations

This notebook uses **KServe V2 Inference Protocol**.


In [None]:
import sys
import os

# Add the project root directory to Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"Added {project_root} to Python path")


In [None]:
# Import required libraries
import requests
import json
import numpy as np
import time
from typing import Dict, List
import subprocess


## Step 1: Build Docker Images

First, we need to build Docker images for our custom components and load them into the Kind cluster.


In [None]:
# Build all Docker images
print("Building Docker images...")
build_script = os.path.join(project_root, "docker", "build-images.sh")
result = subprocess.run(["bash", build_script], capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
    print("Error:", result.stderr)


## Step 2: Deploy InferenceService

Deploy the KServe InferenceService with transformer, predictor, and explainer components.


In [None]:
# Deploy the InferenceService
yaml_path = os.path.join(project_root, "kserve", "advanced", "01-advanced-deployment.yaml")
print("Deploying InferenceService...")
result = subprocess.run(
    ["kubectl", "apply", "-f", yaml_path],
    capture_output=True,
    text=True
)
print(result.stdout)
if result.returncode != 0:
    print("Error:", result.stderr)


## Step 3: Helper Functions (V2 Protocol)

Define helper functions for making requests using KServe V2 Inference Protocol.


In [90]:
# Helper functions - Using KServe V2 Protocol
def make_prediction(instances: List) -> Dict:
    """Make prediction request to KServe InferenceService using V2 protocol"""
    url = "http://localhost:8080/v2/models/fraud-detection-advanced/infer"
    
    # Convert instances to V2 format
    if isinstance(instances[0], list):
        batch_size = len(instances)
        num_features = len(instances[0])
    else:
        batch_size = 1
        num_features = len(instances)
       
    
    payload = {
        "inputs": [{
            "name": "input-0",
            "shape": [batch_size, num_features],
            "datatype": "FP32",
            "data": instances
        }]
    }
    
    headers = {
        "Host": "fraud-detection-advanced.kubeflow-user-example-com.example.com",
        "Content-Type": "application/json",
        "Cookie": "oauth2_proxy_kubeflow=D0mG02qUmZvjCpXZaWF7xE5_pSmAQOQDrqtOY9MIxXET0nyvSDDx_LZXkTdR_Nbz7EMisZpkX5rmGgxK70MIfsRJg4d73gnhahf_vf0HekUhU1VmmDrioRkRJN7CBGbcDJxMvscAjMezhGWMUS4m2PjqvnsxF5NBi4gRxpe986ReqE8yZMNIcpQiQBDWXPPeu75BEDYO0hgCqkisSt-A_wnNXCw2ZxNUHglGXegp1CrjbwyTHeVbgAcuflOKJt5GOCs04YXaCSsOHJhLmJUc_ZulYrdcOvNy5J01g9rMquLzNPuaC90vJlnL9PAZZQTV7DhXQltx7fBVwX6yRIrg7X6XkF5AGtGXg4fzKDoITNmfI9ozsY5iHbji5xSAzK0vrolpm3DjwJmdHRY39gylVGAe3OHmytvvkHBwkShjnABZ6vD6AGwNtDhJXkQYWiCNLLwIgFOifjxjy9FM_UiOj3KNwRETdX_k3KYJxB2n_AmovLddNtiERp3JSg7CL2Wb2ROOB8anGeaSYtkuXwYIs961kY8zQgq4g1YBOmGnzCb72O2pCYumY4mdMJHCaLqithby3UkCyLjNf3WcRwrjg91ohEA_tFcZz3qdjpJc60l7sLU8V9dMeZrs60CppdX7WnPaT2NRu7dKyXxFoZOdzBSov3hAQlyJYn6qJSM6qiuUKhtlz_137P6OFHq9g08FLoPKDxUSHo-nlvlX6bDynYMMLbkw6qQNomY0NIcGQnRVSG1TpaQ4TcaMI8nj3q9G4rXd3KX8qx2Pfc_rVM21alAiIRwpDxggH7r9eHNhmdjpFVP7RqxwmLtf6NB5WS9cgyNUgOnrrWU82UNy6pX6AntQnWnny97PRL-EOmzm5cBZekYKNgkfumQUEqQ3Az3rVnngGcYwEgBXdMnmo4C7FV9qqqLBB0R-4Ng2NogNzorzsU6dUHS3sGEV2CYCeqUCKsBxrCJ6redZ7jseSqPWITOtGMarAfQ1SgTte3LehaJhM386TTN8P8Twg3bB2SgLz4WuwCUsL3onBQw9bmoNQkwas6RMJ02fNGJTOyCG6PJKJpxN9estQp2KaOJ3kZRpIffeqvLAxjftDUnzIuuCztFvUteEN3O0Rks41VR2kMe139edvgSYyDKtdaXiMsmHhytYpHVWVHb-32onyQwM5nkniSPbQ5GBTF0kcsist2mOaRtngSvtUJxyVWD5Hi8gZHchl-PwGg4b6GRGYMKbW0Jwq42-qLws2_8h1Z9pniaeOQJRhHnAwoZ3CFD4fUSnaLq2EckWOGhae77JzUZfvYPfRjOUGXksaEmDa0GHWuedAqlN8DG2Ze0CCiG9veG2Mq06q0mTHqeEOUUCf-56PObPAUf_mz-gid5_DfPvk_-r-AVdVslS4ecXkooLObX8D01YphYm6gBaadZR7ZmBdkYYiMaiPapC12z7s96loEb3tGStFlUEFM53glPA5siP0hR5B0EuYLclPkDv-rN7215eD6yNkB3HhgbHioab1F1rxHR2iiI7G4dF9pfr9rlIlqyfum5c6SWJ8Fd_xs-7Xr4B9wM0g-6PyPv6O0sySPF1lJ4eAFAliZBZBOhCXK0eRJTCGyKn7c_Owupzxrqc1Qiq6cWkuETkNQUKd-YvHktPOMiXg-FPzZ_ciX0kE5rDx4kPd9QaDlSYF88jv7hhDvdhG4vd-0Da09vXzhbI8stgWCea6_geYo3iY8YfkjWZeL_8k_DCgXCh__s1rTkbyx11sWSm8zHPIpYJO12-hwBwjx3OLCJfU7KLIWrBltg91AyP2sdQxYXNXvVf36M6DGRYGHJYxiXhWnxf5f7ZT_HNZvE0X1JEkYVGDEFA5wngwBvJ2n1SA_4QFzhuZB8hM-Aj269jBKEcYiAZOAH3qG5I_vbgY1mbtVLm3Tc=|1761431949|f_5KgK2tvwRFXCJ7mrj-N4_TAh8NucJ1Lw1uZfdGDG0="
    }
    
    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    return response.json()

def get_explanation(instances: List) -> Dict:
    """Get model explanation using SHAP - V2 protocol"""
    url = "http://localhost:8080/v1/models/fraud-detection-advanced:explain"
    
    payload = {
        "instances": instances
    }

    print(payload)
    
    headers = {
        "Host": "fraud-detection-advanced.kubeflow-user-example-com.example.com",
        "Content-Type": "application/json",
        "Cookie": "oauth2_proxy_kubeflow=D0mG02qUmZvjCpXZaWF7xE5_pSmAQOQDrqtOY9MIxXET0nyvSDDx_LZXkTdR_Nbz7EMisZpkX5rmGgxK70MIfsRJg4d73gnhahf_vf0HekUhU1VmmDrioRkRJN7CBGbcDJxMvscAjMezhGWMUS4m2PjqvnsxF5NBi4gRxpe986ReqE8yZMNIcpQiQBDWXPPeu75BEDYO0hgCqkisSt-A_wnNXCw2ZxNUHglGXegp1CrjbwyTHeVbgAcuflOKJt5GOCs04YXaCSsOHJhLmJUc_ZulYrdcOvNy5J01g9rMquLzNPuaC90vJlnL9PAZZQTV7DhXQltx7fBVwX6yRIrg7X6XkF5AGtGXg4fzKDoITNmfI9ozsY5iHbji5xSAzK0vrolpm3DjwJmdHRY39gylVGAe3OHmytvvkHBwkShjnABZ6vD6AGwNtDhJXkQYWiCNLLwIgFOifjxjy9FM_UiOj3KNwRETdX_k3KYJxB2n_AmovLddNtiERp3JSg7CL2Wb2ROOB8anGeaSYtkuXwYIs961kY8zQgq4g1YBOmGnzCb72O2pCYumY4mdMJHCaLqithby3UkCyLjNf3WcRwrjg91ohEA_tFcZz3qdjpJc60l7sLU8V9dMeZrs60CppdX7WnPaT2NRu7dKyXxFoZOdzBSov3hAQlyJYn6qJSM6qiuUKhtlz_137P6OFHq9g08FLoPKDxUSHo-nlvlX6bDynYMMLbkw6qQNomY0NIcGQnRVSG1TpaQ4TcaMI8nj3q9G4rXd3KX8qx2Pfc_rVM21alAiIRwpDxggH7r9eHNhmdjpFVP7RqxwmLtf6NB5WS9cgyNUgOnrrWU82UNy6pX6AntQnWnny97PRL-EOmzm5cBZekYKNgkfumQUEqQ3Az3rVnngGcYwEgBXdMnmo4C7FV9qqqLBB0R-4Ng2NogNzorzsU6dUHS3sGEV2CYCeqUCKsBxrCJ6redZ7jseSqPWITOtGMarAfQ1SgTte3LehaJhM386TTN8P8Twg3bB2SgLz4WuwCUsL3onBQw9bmoNQkwas6RMJ02fNGJTOyCG6PJKJpxN9estQp2KaOJ3kZRpIffeqvLAxjftDUnzIuuCztFvUteEN3O0Rks41VR2kMe139edvgSYyDKtdaXiMsmHhytYpHVWVHb-32onyQwM5nkniSPbQ5GBTF0kcsist2mOaRtngSvtUJxyVWD5Hi8gZHchl-PwGg4b6GRGYMKbW0Jwq42-qLws2_8h1Z9pniaeOQJRhHnAwoZ3CFD4fUSnaLq2EckWOGhae77JzUZfvYPfRjOUGXksaEmDa0GHWuedAqlN8DG2Ze0CCiG9veG2Mq06q0mTHqeEOUUCf-56PObPAUf_mz-gid5_DfPvk_-r-AVdVslS4ecXkooLObX8D01YphYm6gBaadZR7ZmBdkYYiMaiPapC12z7s96loEb3tGStFlUEFM53glPA5siP0hR5B0EuYLclPkDv-rN7215eD6yNkB3HhgbHioab1F1rxHR2iiI7G4dF9pfr9rlIlqyfum5c6SWJ8Fd_xs-7Xr4B9wM0g-6PyPv6O0sySPF1lJ4eAFAliZBZBOhCXK0eRJTCGyKn7c_Owupzxrqc1Qiq6cWkuETkNQUKd-YvHktPOMiXg-FPzZ_ciX0kE5rDx4kPd9QaDlSYF88jv7hhDvdhG4vd-0Da09vXzhbI8stgWCea6_geYo3iY8YfkjWZeL_8k_DCgXCh__s1rTkbyx11sWSm8zHPIpYJO12-hwBwjx3OLCJfU7KLIWrBltg91AyP2sdQxYXNXvVf36M6DGRYGHJYxiXhWnxf5f7ZT_HNZvE0X1JEkYVGDEFA5wngwBvJ2n1SA_4QFzhuZB8hM-Aj269jBKEcYiAZOAH3qG5I_vbgY1mbtVLm3Tc=|1761431949|f_5KgK2tvwRFXCJ7mrj-N4_TAh8NucJ1Lw1uZfdGDG0="
    }
    
    response = requests.post(url, json=payload, headers=headers)
    response.raise_for_status()
    return response.json()

print("‚úÖ Helper functions defined !")


‚úÖ Helper functions defined !


## Step 4: Test Predictions

Test the deployed model with sample fraud detection data.


In [None]:
# Generate test data (30 features for fraud detection)
test_instances = [
    # Normal transaction
    [3.2, 2.8, 4.1, 2.5, 3.0, 2.9, 3.5, 2.7, 3.8, 4.2,
     2.6, 3.1, 2.4, 3.7, 2.8, 3.3, 2.9, 4.0, 3.6, 2.7,
     3.4, 2.5, 3.9, 3.2, 2.8, 3.0, 3.7, 2.6, 3.5, 2.9],
    # Suspicious transaction
    [8.5, 9.2, 7.8, 8.9, 9.1, 8.7, 9.4, 8.3, 9.0, 8.6,
     9.3, 8.8, 9.5, 8.4, 9.2, 8.9, 9.1, 8.7, 9.3, 8.5,
     9.0, 8.6, 9.4, 8.2, 9.1, 8.8, 9.2, 8.7, 9.0, 8.9]
]

print(f"Test instances shape: {len(test_instances)} x {len(test_instances[0])}")


In [91]:
# Make predictions
print("Making predictions...")
try:
    response = make_prediction(test_instances)
    print("\nüìä Prediction Response:")
    print(json.dumps(response, indent=2))
except Exception as e:
    print(e)
    print("\nüí° Tip: Make sure port-forward is running:")
    print("   kubectl port-forward -n istio-system svc/istio-ingressgateway 8080:80")


Making predictions...

üìä Prediction Response:
{
  "model_name": "fraud-detection-advanced",
  "model_version": null,
  "id": "d5be787a-47a1-400b-8ccf-5b90143af7d8",
  "parameters": null,
  "outputs": [
    {
      "name": "output-0",
      "shape": [
        2
      ],
      "datatype": "INT64",
      "parameters": null,
      "data": [
        0,
        0
      ]
    }
  ]
}


## Step 6: Test Explainer

Get SHAP-based explanations for the predictions.


In [92]:
# Get explanations
print("Getting explanations...")
try:
    explanation = get_explanation([test_instances[0]])  # Explain first instance
    print("\nüîç Explanation Response:")
    print(json.dumps(explanation, indent=2))
except Exception as e:
    print(f"‚ùå Error: {e}")


Getting explanations...
{'instances': [[3.2, 2.8, 4.1, 2.5, 3.0, 2.9, 3.5, 2.7, 3.8, 4.2, 2.6, 3.1, 2.4, 3.7, 2.8, 3.3, 2.9, 4.0, 3.6, 2.7, 3.4, 2.5, 3.9, 3.2, 2.8, 3.0, 3.7, 2.6, 3.5, 2.9]]}

üîç Explanation Response:
{
  "explanations": [
    {
      "prediction": {
        "class": 0,
        "label": "legitimate",
        "probabilities": {
          "legitimate": 0.86,
          "fraud": 0.14
        },
        "confidence": 0.86
      },
      "explanation": {
        "base_value": 0.42407401344437007,
        "prediction_value": 0.13999999999999957,
        "text": "This transaction is predicted as LEGITIMATE with 86.0% confidence. The most influential factors are:\n1. Amount (value: 3.200) decreases fraud risk by 0.077\n2. V6 (value: 2.700) decreases fraud risk by 0.053\n3. V24 (value: 3.000) decreases fraud risk by 0.046\n",
        "top_features": [
          {
            "feature": "Amount",
            "value": 3.2,
            "shap_value": -0.0772663920251738,
         

## Step 7: Analyze Feature Importance

Extract and visualize the most important features from SHAP explanations.


In [93]:
# Analyze feature importance
try:
    if 'explanations' in explanation:
        exp = explanation['explanations'][0]
        feature_importance = exp.get('feature_importance', {})
        
        # Sort by absolute importance
        sorted_features = sorted(
            feature_importance.items(),
            key=lambda x: abs(x[1]),
            reverse=True
        )
        
        print("\nüéØ Top 10 Most Important Features:")
        print("-" * 50)
        for feature, importance in sorted_features[:10]:
            direction = "‚Üë" if importance > 0 else "‚Üì"
            print(f"{direction} {feature}: {importance:.4f}")
            
except Exception as e:
    print(f"Error analyzing explanations: {e}")



üéØ Top 10 Most Important Features:
--------------------------------------------------


## Step 8: Batch Predictions

Test batch prediction performance with multiple instances.


In [94]:
# Generate batch test data
batch_size = 10
batch_instances = []
for i in range(batch_size):
    # Generate random features
    instance = np.random.uniform(2.0, 5.0, 30).tolist()
    batch_instances.append(instance)

print(f"Generated {batch_size} test instances")

# Make batch predictions
try:
    start_time = time.time()
    batch_response = make_prediction(batch_instances)
    end_time = time.time()
    
    print(f"\n‚ö° Batch Prediction Performance:")
    print(f"  - Batch size: {batch_size}")
    print(f"  - Time taken: {(end_time - start_time):.3f} seconds")
    print(f"  - Throughput: {batch_size / (end_time - start_time):.2f} predictions/sec")
    
except Exception as e:
    print(f"‚ùå Error: {e}")


Generated 10 test instances

‚ö° Batch Prediction Performance:
  - Batch size: 10
  - Time taken: 0.023 seconds
  - Throughput: 431.72 predictions/sec
