# Titanic Survival Classifier - Inference Example

This notebook demonstrates how to load trained artifacts and perform inference on the Titanic survival prediction model.

## Overview
- Load preprocessor and CatBoost model from artifacts
- Create sample passengers and predict survival probability
- Show feature engineering and preprocessing pipeline
- Demonstrate batch predictions

**Note**: This is the raw notebook that would be used with `ml_server ainit` to automatically generate MLServer configuration files.

In [None]:
import pickle
import json
import pandas as pd
import numpy as np
from pathlib import Path

## Load Trained Artifacts

Load the preprocessor, model, and feature order from the artifacts created during training.

In [None]:
# Load artifacts from training
artifacts_path = Path("../example_titanic_manual_setup/artifacts")

# Load preprocessor
with open(artifacts_path / "preprocessor.pkl", "rb") as f:
    preprocessor = pickle.load(f)

# Load CatBoost model
with open(artifacts_path / "catboost_model.pkl", "rb") as f:
    model = pickle.load(f)

# Load feature order
with open(artifacts_path / "feature_order.json", "r") as f:
    feature_order = json.load(f)

print("Artifacts loaded successfully!")
print(f"Feature order: {feature_order}")

## Feature Engineering Function

Define the same feature engineering logic used during training.

In [None]:
def engineer_features(df):
    """Apply the same feature engineering as in training."""
    df = df.copy()
    
    # Derive additional features
    df["alone"] = (df["sibsp"].fillna(0) + df["parch"].fillna(0) == 0)
    df["adult_male"] = (df["sex"].astype(str).str.lower().eq("male")) & (df["age"].fillna(99) >= 16)
    df["who"] = np.where(
        df["age"].fillna(99) < 16, "child",
        np.where(df["sex"].astype(str).str.lower().eq("male"), "man", "woman")
    )
    
    # Reorder columns to match training feature order
    return df[feature_order]

def predict_survival(passenger_data):
    """Predict survival probability for passenger data."""
    if isinstance(passenger_data, dict):
        passenger_data = pd.DataFrame([passenger_data])
    
    # Apply feature engineering
    features = engineer_features(passenger_data)
    
    # Preprocess features
    X_processed = preprocessor.transform(features)
    
    # Get prediction probabilities
    proba = model.predict_proba(X_processed)[:, 1]
    
    return proba

## Example Predictions

Create some example passengers and predict their survival probability.

In [None]:
# Example passengers
passengers = [
    {
        "pclass": 1,
        "sex": "female",
        "age": 25,
        "sibsp": 0,
        "parch": 0,
        "fare": 80.0,
        "embarked": "S"
    },
    {
        "pclass": 3,
        "sex": "male",
        "age": 30,
        "sibsp": 1,
        "parch": 2,
        "fare": 15.0,
        "embarked": "Q"
    },
    {
        "pclass": 2,
        "sex": "female",
        "age": 5,
        "sibsp": 1,
        "parch": 1,
        "fare": 25.0,
        "embarked": "C"
    }
]

# Create DataFrame
passenger_df = pd.DataFrame(passengers)
print("Example passengers:")
print(passenger_df)

# Get predictions
survival_probs = predict_survival(passenger_df)

# Display results
results = passenger_df.copy()
results['survival_probability'] = survival_probs
results['predicted_survival'] = (survival_probs > 0.5).astype(int)

print("\nPrediction results:")
print(results)

## Single Passenger Prediction

Example of predicting for a single passenger.

In [None]:
# Single passenger example
single_passenger = {
    "pclass": 1,
    "sex": "female",
    "age": 28,
    "sibsp": 1,
    "parch": 0,
    "fare": 120.0,
    "embarked": "S"
}

survival_prob = predict_survival(single_passenger)[0]
print(f"Passenger: {single_passenger}")
print(f"Survival probability: {survival_prob:.3f}")
print(f"Predicted outcome: {'Survived' if survival_prob > 0.5 else 'Did not survive'}")

## Model Information

Display information about the loaded model and preprocessing pipeline.

In [None]:
print("Model Information:")
print(f"Model type: {type(model).__name__}")
print(f"Feature count: {model.feature_count_}")
print(f"Tree count: {model.tree_count_}")

print("\nPreprocessor Information:")
print(f"Preprocessor type: {type(preprocessor).__name__}")
print(f"Feature names in: {feature_order}")
print(f"Transformed feature count: {preprocessor.transform(pd.DataFrame([single_passenger])).shape[1]}")

## Class for ML Server Integration

This is an example of how this model could be wrapped into a predictor class for MLServer.

**Note**: This cell shows the pattern that `ml_server ainit` would extract to generate the predictor class automatically.

In [None]:
class TitanicPredictor:
    """Example predictor class for Titanic survival model."""
    
    def __init__(self, model_path: str, preprocessor_path: str, feature_order_path: str):
        # Load artifacts
        with open(model_path, "rb") as f:
            self.model = pickle.load(f)
        with open(preprocessor_path, "rb") as f:
            self.preprocessor = pickle.load(f)
        with open(feature_order_path, "r") as f:
            self.feature_order = json.load(f)
    
    def _engineer_features(self, df):
        """Apply feature engineering."""
        df = df.copy()
        df["alone"] = (df["sibsp"].fillna(0) + df["parch"].fillna(0) == 0)
        df["adult_male"] = (df["sex"].astype(str).str.lower().eq("male")) & (df["age"].fillna(99) >= 16)
        df["who"] = np.where(
            df["age"].fillna(99) < 16, "child",
            np.where(df["sex"].astype(str).str.lower().eq("male"), "man", "woman")
        )
        return df[self.feature_order]
    
    def predict(self, X):
        """Predict survival probability."""
        if isinstance(X, dict):
            X = pd.DataFrame([X])
        elif isinstance(X, list):
            X = pd.DataFrame(X)
        
        # Feature engineering
        features = self._engineer_features(X)
        
        # Preprocessing
        X_processed = self.preprocessor.transform(features)
        
        # Prediction
        proba = self.model.predict_proba(X_processed)[:, 1]
        return proba.tolist()
    
    def predict_proba(self, X):
        """Return full probability matrix."""
        if isinstance(X, dict):
            X = pd.DataFrame([X])
        elif isinstance(X, list):
            X = pd.DataFrame(X)
        
        features = self._engineer_features(X)
        X_processed = self.preprocessor.transform(features)
        proba = self.model.predict_proba(X_processed)
        return proba.tolist()

# Demo the predictor class
predictor = TitanicPredictor(
    model_path="../example_titanic_manual_setup/artifacts/catboost_model.pkl",
    preprocessor_path="../example_titanic_manual_setup/artifacts/preprocessor.pkl", 
    feature_order_path="../example_titanic_manual_setup/artifacts/feature_order.json"
)

test_result = predictor.predict(single_passenger)
print(f"Predictor class result: {test_result[0]:.3f}")