# Test Iris Classifier InferenceService

This notebook demonstrates how to call the deployed KServe InferenceService from JupyterHub.

## 1. Setup

In [None]:
# Install requests if not already installed
!pip install requests -q

In [None]:
import requests
import json
import os

## 2. Configure Endpoint

The InferenceService is accessible via the cluster-internal service URL.

In [None]:
# KServe InferenceService endpoint
# Format: http://<service-name>-predictor.<namespace>.svc.cluster.local/v2/models/<model-name>/infer
INFERENCE_SERVICE_NAME = "iris-classifier"
NAMESPACE = "kserve"
ENDPOINT = f"http://{INFERENCE_SERVICE_NAME}-predictor.{NAMESPACE}.svc.cluster.local/v2/models/{INFERENCE_SERVICE_NAME}/infer"

print(f"Inference Endpoint: {ENDPOINT}")

## 3. Define Test Samples

Iris dataset has 4 features:
1. Sepal length (cm)
2. Sepal width (cm)
3. Petal length (cm)
4. Petal width (cm)

Classes:
- 0: Iris Setosa
- 1: Iris Versicolor
- 2: Iris Virginica

In [None]:
# Define class names
CLASS_NAMES = ["Iris Setosa", "Iris Versicolor", "Iris Virginica"]

# Test samples with expected predictions
test_cases = [
    {
        "name": "Typical Setosa",
        "features": [5.1, 3.5, 1.4, 0.2],
        "expected_class": 0,
        "description": "Short petals, typical of Setosa"
    },
    {
        "name": "Typical Virginica",
        "features": [6.7, 3.0, 5.2, 2.3],
        "expected_class": 2,
        "description": "Long petals and sepals, typical of Virginica"
    },
    {
        "name": "Typical Versicolor",
        "features": [5.9, 3.0, 4.2, 1.5],
        "expected_class": 1,
        "description": "Medium-sized features, typical of Versicolor"
    },
]

print(f"Prepared {len(test_cases)} test cases")

## 4. Test Single Prediction

Send a single prediction request to the InferenceService.

In [None]:
def predict(features):
    """
    Send prediction request to KServe InferenceService using v2 protocol.
    
    Args:
        features: List of feature values [sepal_length, sepal_width, petal_length, petal_width]
    
    Returns:
        Predicted class (0, 1, or 2)
    """
    payload = {
        "inputs": [
            {
                "name": "input-0",
                "shape": [1, 4],
                "datatype": "FP64",
                "data": [features]
            }
        ]
    }
    
    try:
        response = requests.post(ENDPOINT, json=payload, timeout=10)
        response.raise_for_status()
        result = response.json()
        return result['outputs'][0]['data'][0]
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        if hasattr(e, 'response') and hasattr(e.response, 'text'):
            print(f"Response: {e.response.text}")
        return None

# Test with first sample
sample = test_cases[0]
print(f"Testing: {sample['name']}")
print(f"Features: {sample['features']}")
print(f"Description: {sample['description']}")
print()

prediction = predict(sample['features'])
if prediction is not None:
    print(f"Predicted Class: {prediction} ({CLASS_NAMES[prediction]})")
    print(f"Expected Class: {sample['expected_class']} ({CLASS_NAMES[sample['expected_class']]})")
    print(f"Result: {'✓ PASS' if prediction == sample['expected_class'] else '✗ FAIL'}")

## 5. Test All Cases

Run predictions for all test cases and display results.

In [None]:
print("=" * 80)
print("Testing Iris Classifier InferenceService")
print("=" * 80)
print()

results = []

for i, test_case in enumerate(test_cases, 1):
    print(f"Test Case {i}: {test_case['name']}")
    print(f"  Features: {test_case['features']}")
    print(f"  Description: {test_case['description']}")
    print(f"  Expected: {CLASS_NAMES[test_case['expected_class']]}")
    
    prediction = predict(test_case['features'])
    
    if prediction is not None:
        predicted_class_name = CLASS_NAMES[prediction]
        is_correct = prediction == test_case['expected_class']
        status = "✓ PASS" if is_correct else "✗ FAIL"
        
        print(f"  Predicted: {predicted_class_name}")
        print(f"  Status: {status}")
        
        results.append({
            'name': test_case['name'],
            'expected': test_case['expected_class'],
            'predicted': prediction,
            'correct': is_correct
        })
    else:
        print(f"  Status: ✗ ERROR")
        results.append({
            'name': test_case['name'],
            'expected': test_case['expected_class'],
            'predicted': None,
            'correct': False
        })
    
    print()

# Summary
print("=" * 80)
passed = sum(1 for r in results if r['correct'])
total = len(results)
print(f"Test Summary: {passed}/{total} passed")
print("=" * 80)

## 6. Batch Prediction

Send multiple samples in a single request for batch prediction.

In [None]:
def predict_batch(features_list):
    """
    Send batch prediction request to KServe InferenceService using v2 protocol.
    
    Args:
        features_list: List of feature arrays
    
    Returns:
        List of predicted classes
    """
    payload = {
        "inputs": [
            {
                "name": "input-0",
                "shape": [len(features_list), 4],
                "datatype": "FP64",
                "data": features_list
            }
        ]
    }
    
    try:
        response = requests.post(ENDPOINT, json=payload, timeout=10)
        response.raise_for_status()
        result = response.json()
        return result['outputs'][0]['data']
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        if hasattr(e, 'response') and hasattr(e.response, 'text'):
            print(f"Response: {e.response.text}")
        return None

# Prepare batch request
batch_features = [tc['features'] for tc in test_cases]
print(f"Sending batch request with {len(batch_features)} samples...")
print()

# Send batch request
predictions = predict_batch(batch_features)

if predictions:
    print("Batch Prediction Results:")
    print("-" * 60)
    for i, (test_case, prediction) in enumerate(zip(test_cases, predictions), 1):
        print(f"{i}. {test_case['name']}")
        print(f"   Predicted: {CLASS_NAMES[prediction]}")
        print(f"   Expected: {CLASS_NAMES[test_case['expected_class']]}")
        status = "✓" if prediction == test_case['expected_class'] else "✗"
        print(f"   {status}")
        print()

## 7. Custom Prediction

Try your own input values!

In [None]:
# Enter your own values here
# Format: [sepal_length, sepal_width, petal_length, petal_width]
custom_features = [6.0, 3.0, 4.0, 1.5]

print(f"Custom Input: {custom_features}")
print(f"  Sepal Length: {custom_features[0]} cm")
print(f"  Sepal Width: {custom_features[1]} cm")
print(f"  Petal Length: {custom_features[2]} cm")
print(f"  Petal Width: {custom_features[3]} cm")
print()

prediction = predict(custom_features)
if prediction is not None:
    print(f"Prediction: {CLASS_NAMES[prediction]} (class {prediction})")

## 8. Check InferenceService Status

Verify the InferenceService is running properly.

In [None]:
# Check if we can reach the endpoint
import subprocess

print("Checking InferenceService status...")
print()

# Using kubectl from the notebook
try:
    result = subprocess.run(
        ["kubectl", "get", "inferenceservice", INFERENCE_SERVICE_NAME, "-n", NAMESPACE],
        capture_output=True,
        text=True,
        timeout=10
    )
    print(result.stdout)
    if result.returncode != 0:
        print(result.stderr)
except Exception as e:
    print(f"Could not check status: {e}")
    print("This is normal if kubectl is not available in the notebook environment.")

## Summary

You have successfully:
1. ✅ Connected to the KServe InferenceService
2. ✅ Sent single prediction requests
3. ✅ Sent batch prediction requests
4. ✅ Verified predictions against expected results

## Next Steps

- Try running the Kubernetes Job-based tests (see `04-test-inference-job.yaml`)
- Deploy a new version of the model and compare predictions
- Implement A/B testing with multiple model versions
- Add monitoring and logging