# Step 4: API Inference Scoring

## Objective
Score the `inference_dataset.csv` using your deployed H2O MLOps model endpoint and save predictions to `scores.csv`.

## Prerequisites
- Model trained in H2O Driverless AI (Step 2)
- Model deployed to H2O MLOps with active endpoint (Step 3)
- Your deployment endpoint URL (e.g., `https://api.example.com/v1/models/<model-id>/score`)
- `inference_dataset.csv` file in your data directory

## Deliverables
- `scores.csv` with predictions for all samples
- `inference_metrics.json` with API performance metrics (latency, throughput)

---
## 1. Setup and Imports

In [None]:
# Install required packages (run once)
# !pip install pandas numpy requests

In [None]:
import pandas as pd
import numpy as np
import requests
import json
import time
from datetime import datetime
import os

print(f"Notebook started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

---
## 2. Configuration

**IMPORTANT**: Update the `ENDPOINT_URL` with your actual H2O MLOps deployment endpoint.

In [None]:
# ============================================
# CONFIGURATION - UPDATE THESE VALUES
# ============================================

# Your H2O MLOps model endpoint URL
ENDPOINT_URL = "<your endpoint>"

# API Key (if required by your deployment)
API_KEY = None  # Set to your API key string if needed, e.g., "your-api-key-here"

# File paths
INFERENCE_DATA_PATH = "../data/inference_dataset.csv"
SCORES_OUTPUT_PATH = "../data/scores.csv"
METRICS_OUTPUT_PATH = "../reports/inference_metrics.json"

# Batch size for API requests
BATCH_SIZE = 100  # Number of samples per API call

print("Configuration loaded.")
print(f"Endpoint: {ENDPOINT_URL}")
print(f"Batch size: {BATCH_SIZE}")

---
## 3. Load Inference Dataset

In [None]:
# Load the inference dataset
inference_df = pd.read_csv(INFERENCE_DATA_PATH)

print(f"Loaded inference dataset:")
print(f"  - Samples: {len(inference_df):,}")
print(f"  - Features: {len(inference_df.columns)}")
print(f"  - Columns: {list(inference_df.columns)}")

# Display first few rows
inference_df.head()

---
## 4. Helper Functions for API Scoring

These functions handle sending requests to your H2O MLOps endpoint and parsing responses.

In [None]:
def create_request_headers(api_key=None):
    """
    Create HTTP headers for API request.
    
    Args:
        api_key: Optional API key for authentication
    
    Returns:
        Dictionary of headers
    """
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }
    if api_key:
        headers['Authorization'] = f'Bearer {api_key}'
    return headers


def create_scoring_payload(df_batch):
    """
    Create JSON payload for H2O MLOps scoring API.
    
    Args:
        df_batch: DataFrame with rows to score
    
    Returns:
        Dictionary payload for API request
    """
    payload = {
        "fields": list(df_batch.columns),
        "rows": df_batch.values.tolist()
    }
    return payload


def send_scoring_request(endpoint_url, payload, headers, timeout=60):
    """
    Send scoring request to H2O MLOps endpoint.
    
    Args:
        endpoint_url: Full URL to scoring endpoint
        payload: JSON payload with data to score
        headers: HTTP headers
        timeout: Request timeout in seconds
    
    Returns:
        Tuple of (response_json, latency_ms, success)
    """
    start_time = time.time()
    
    try:
        response = requests.post(
            endpoint_url,
            headers=headers,
            json=payload,
            timeout=timeout
        )
        latency_ms = (time.time() - start_time) * 1000
        
        if response.status_code == 200:
            return response.json(), latency_ms, True
        else:
            print(f"Error: HTTP {response.status_code} - {response.text}")
            return None, latency_ms, False
            
    except requests.exceptions.Timeout:
        latency_ms = (time.time() - start_time) * 1000
        print(f"Error: Request timeout after {timeout} seconds")
        return None, latency_ms, False
        
    except Exception as e:
        latency_ms = (time.time() - start_time) * 1000
        print(f"Error: {str(e)}")
        return None, latency_ms, False


def parse_scoring_response(response_json, batch_df):
    """
    Parse H2O MLOps scoring response.
    
    Expected response format:
    {
        "fields": ["DEFAULT_PAYMENT_NEXT_MONTH.0", "DEFAULT_PAYMENT_NEXT_MONTH.1"],
        "score": [[0.8, 0.2], [0.3, 0.7], ...]
    }
    
    Args:
        response_json: JSON response from API
        batch_df: Original batch DataFrame for ID mapping
    
    Returns:
        List of dictionaries with predictions
    """
    results = []
    
    try:
        fields = response_json.get('fields', [])
        scores = response_json.get('score', response_json.get('scores', []))
        
        # Determine which index is probability of default (class 1)
        prob_default_idx = 1  # Usually second column is P(class=1)
        prob_no_default_idx = 0
        
        # Check field names to be sure
        for i, field in enumerate(fields):
            if '.1' in str(field) or 'DEFAULT' in str(field).upper():
                prob_default_idx = i
                prob_no_default_idx = 1 - i if i <= 1 else 0
                break
        
        # Parse each score
        for i, score_row in enumerate(scores):
            if i < len(batch_df):
                row_id = batch_df.iloc[i]['ID']
                
                if len(score_row) >= 2:
                    prob_no_default = float(score_row[prob_no_default_idx])
                    prob_default = float(score_row[prob_default_idx])
                else:
                    prob_default = float(score_row[0])
                    prob_no_default = 1 - prob_default
                
                prediction = 1 if prob_default >= 0.5 else 0
                
                results.append({
                    'ID': int(row_id),
                    'prediction': prediction,
                    'probability_default': round(prob_default, 6),
                    'probability_no_default': round(prob_no_default, 6)
                })
                
    except Exception as e:
        print(f"Error parsing response: {e}")
        print(f"Response: {response_json}")
    
    return results


print("Helper functions loaded successfully!")

---
## 5. Main Scoring Function

This function scores the entire inference dataset in batches and collects performance metrics.

In [None]:
def score_inference_dataset(df, endpoint_url, api_key=None, batch_size=100):
    """
    Score the entire inference dataset using H2O MLOps API.
    
    Args:
        df: DataFrame with inference data
        endpoint_url: H2O MLOps scoring endpoint URL
        api_key: Optional API key
        batch_size: Number of samples per API request
    
    Returns:
        Tuple of (scores_df, metrics_dict)
    """
    headers = create_request_headers(api_key)
    
    all_results = []
    latencies = []
    failed_batches = 0
    
    n_samples = len(df)
    n_batches = (n_samples + batch_size - 1) // batch_size
    
    print(f"\nStarting inference scoring...")
    print(f"  Total samples: {n_samples:,}")
    print(f"  Batch size: {batch_size}")
    print(f"  Total batches: {n_batches}")
    print("-" * 60)
    
    start_time = time.time()
    
    for batch_idx in range(n_batches):
        start_idx = batch_idx * batch_size
        end_idx = min(start_idx + batch_size, n_samples)
        batch_df = df.iloc[start_idx:end_idx]
        
        # Create payload
        payload = create_scoring_payload(batch_df)
        
        # Send request
        response_json, latency_ms, success = send_scoring_request(
            endpoint_url, payload, headers
        )
        
        latencies.append(latency_ms)
        
        if success and response_json:
            # Parse response
            batch_results = parse_scoring_response(response_json, batch_df)
            all_results.extend(batch_results)
        else:
            failed_batches += 1
            # Add null results for failed batch
            for idx in range(len(batch_df)):
                all_results.append({
                    'ID': int(batch_df.iloc[idx]['ID']),
                    'prediction': None,
                    'probability_default': None,
                    'probability_no_default': None
                })
        
        # Progress update
        print(f"\rBatch {batch_idx + 1}/{n_batches} completed | "
              f"Latency: {latency_ms:.2f}ms | "
              f"Progress: {end_idx}/{n_samples} samples", end="", flush=True)
    
    total_time = time.time() - start_time
    
    print(f"\n\nScoring completed in {total_time:.2f} seconds")
    
    # Create results DataFrame
    scores_df = pd.DataFrame(all_results)
    
    # Calculate metrics
    metrics = {
        'timestamp': datetime.now().isoformat(),
        'endpoint': endpoint_url,
        'total_samples': n_samples,
        'batch_size': batch_size,
        'total_batches': n_batches,
        'failed_batches': failed_batches,
        'success_rate': ((n_batches - failed_batches) / n_batches * 100) if n_batches > 0 else 0,
        'total_time_seconds': total_time,
        'throughput_samples_per_second': n_samples / total_time if total_time > 0 else 0,
        'latency_stats': {
            'avg_ms': np.mean(latencies),
            'min_ms': np.min(latencies),
            'max_ms': np.max(latencies),
            'p50_ms': np.percentile(latencies, 50),
            'p95_ms': np.percentile(latencies, 95),
            'p99_ms': np.percentile(latencies, 99),
            'total_ms': np.sum(latencies)
        }
    }
    
    return scores_df, metrics


print("Main scoring function loaded!")

---
## 6. Execute Scoring

**IMPORTANT**: Make sure you have updated `ENDPOINT_URL` with your actual deployment endpoint before running this cell.

In [None]:
# Verify endpoint is configured
if "<YOUR-DEPLOYMENT-ID>" in ENDPOINT_URL:
    print("ERROR: Please update ENDPOINT_URL with your actual deployment ID!")
    print(f"Current value: {ENDPOINT_URL}")
else:
    print("Endpoint configured. Ready to score.")
    print(f"Endpoint: {ENDPOINT_URL}")

In [None]:
# Execute scoring
scores_df, metrics = score_inference_dataset(
    df=inference_df,
    endpoint_url=ENDPOINT_URL,
    api_key=API_KEY,
    batch_size=BATCH_SIZE
)

---
## 7. Review Scoring Results

In [None]:
# Display scores summary
print("Scoring Results Summary")
print("=" * 60)
print(f"Total predictions: {len(scores_df):,}")
print(f"Predicted defaults: {(scores_df['prediction'] == 1).sum():,}")
print(f"Predicted non-defaults: {(scores_df['prediction'] == 0).sum():,}")
print(f"Null predictions (failed): {scores_df['prediction'].isnull().sum():,}")
print(f"\nPredicted default rate: {(scores_df['prediction'] == 1).mean():.2%}")
print(f"Average probability: {scores_df['probability_default'].mean():.4f}")

# Display first few predictions
print("\nFirst 10 predictions:")
scores_df.head(10)

In [None]:
# Display performance metrics
print("\nAPI Performance Metrics")
print("=" * 60)
print(f"Total samples scored: {metrics['total_samples']:,}")
print(f"Total batches: {metrics['total_batches']}")
print(f"Failed batches: {metrics['failed_batches']}")
print(f"Success rate: {metrics['success_rate']:.2f}%")
print(f"\nTotal time: {metrics['total_time_seconds']:.2f} seconds")
print(f"Throughput: {metrics['throughput_samples_per_second']:.2f} samples/second")
print(f"\nLatency Statistics:")
print(f"  Average: {metrics['latency_stats']['avg_ms']:.2f} ms")
print(f"  Minimum: {metrics['latency_stats']['min_ms']:.2f} ms")
print(f"  Maximum: {metrics['latency_stats']['max_ms']:.2f} ms")
print(f"  P50 (Median): {metrics['latency_stats']['p50_ms']:.2f} ms")
print(f"  P95: {metrics['latency_stats']['p95_ms']:.2f} ms")
print(f"  P99: {metrics['latency_stats']['p99_ms']:.2f} ms")

---
## 8. Save Results

Save the scoring results and metrics for model monitoring and reporting.

In [None]:
# Save scores to CSV
scores_df.to_csv(SCORES_OUTPUT_PATH, index=False)
print(f"Scores saved to: {SCORES_OUTPUT_PATH}")

# Save metrics to JSON
os.makedirs(os.path.dirname(METRICS_OUTPUT_PATH), exist_ok=True)
with open(METRICS_OUTPUT_PATH, 'w') as f:
    json.dump(metrics, f, indent=2)
print(f"Metrics saved to: {METRICS_OUTPUT_PATH}")

In [None]:
# Verify saved files
print("\nVerification:")
print(f"scores.csv size: {os.path.getsize(SCORES_OUTPUT_PATH):,} bytes")
print(f"metrics.json size: {os.path.getsize(METRICS_OUTPUT_PATH):,} bytes")

# Quick check of saved scores
saved_scores = pd.read_csv(SCORES_OUTPUT_PATH)
print(f"\nSaved scores shape: {saved_scores.shape}")
print(f"Columns: {list(saved_scores.columns)}")

---
## 9. Next Steps

You have successfully:
1. Loaded the inference dataset
2. Scored all samples using your H2O MLOps endpoint
3. Saved predictions to `scores.csv`
4. Recorded API performance metrics

**Next**: Proceed to **Step 5: Model Monitoring** to evaluate model performance by comparing `scores.csv` with `ground_truth.csv`.

---
### Files Generated:
- `data/scores.csv` - Model predictions for inference dataset
- `reports/inference_metrics.json` - API latency and performance metrics