# SageMaker V3 Local Container Mode Example

This notebook demonstrates how to use SageMaker V3 ModelBuilder in Local Container mode for testing models in Docker containers locally.

In [None]:
# Import required libraries
import json
import uuid
import tempfile
import os
import shutil
import torch
import torch.nn as nn

from sagemaker.serve.model_builder import ModelBuilder
from sagemaker.serve.spec.inference_spec import InferenceSpec
from sagemaker.serve.builder.schema_builder import SchemaBuilder
from sagemaker.serve.utils.types import ModelServer
from sagemaker.serve.mode.function_pointers import Mode

## Step 1: Create a PyTorch Model

Create and save a simple PyTorch model for local container testing.

In [None]:
class SimpleModel(nn.Module):
    """Simple PyTorch model for testing."""
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 2)
    
    def forward(self, x):
        return torch.softmax(self.linear(x), dim=1)

def save_pytorch_model(model_path: str):
    """Save PyTorch model for testing."""
    model = SimpleModel()
    sample_input = torch.tensor([[1.0, 2.0, 3.0, 4.0]], dtype=torch.float32)
    traced_model = torch.jit.trace(model, sample_input)
    model_file = os.path.join(model_path, "model.pt")
    torch.jit.save(traced_model, model_file)
    return model_file

# Create temporary model directory and save model
temp_model_path = tempfile.mkdtemp()
model_file = save_pytorch_model(temp_model_path)
print(f"Model saved to: {model_file}")

## Step 2: Define PyTorch InferenceSpec

Create an InferenceSpec that can load and run our PyTorch model.

In [None]:
class PyTorchInferenceSpec(InferenceSpec):
    """PyTorch InferenceSpec for local container mode."""
    
    def __init__(self, model_path=None):
        self.model_path = model_path
    
    def prepare(self, model_dir: str):
        """Prepare PyTorch model artifacts."""
        if self.model_path:
            src_model = os.path.join(self.model_path, "model.pt")
            dst_model = os.path.join(model_dir, "model.pt")
            if os.path.exists(src_model) and src_model != dst_model:
                shutil.copy2(src_model, dst_model)
    
    def load(self, model_dir: str):
        """Load PyTorch model."""
        model_path = os.path.join(model_dir, "model.pt")
        
        if os.path.exists(model_path):
            model = torch.jit.load(model_path, map_location='cpu')
        else:
            model = SimpleModel()
        
        model.eval()
        return model
    
    def invoke(self, input_object, model):
        """PyTorch inference."""
        if isinstance(input_object, dict) and "data" in input_object:
            input_data = input_object["data"]
        else:
            input_data = input_object
        
        if isinstance(input_data, list):
            input_tensor = torch.tensor(input_data, dtype=torch.float32)
        else:
            input_tensor = torch.tensor(input_data.tolist() if hasattr(input_data, 'tolist') else input_data, dtype=torch.float32)
        
        with torch.no_grad():
            output = model(input_tensor)
            return output.tolist()

print("PyTorch InferenceSpec defined successfully!")

## Step 3: Create Schema Builder

Define the input/output schema for our PyTorch model.

In [None]:
# Create PyTorch schema builder
sample_input = [[1.0, 2.0, 3.0, 4.0]]
sample_output = [[0.6, 0.4]]
schema_builder = SchemaBuilder(sample_input, sample_output)

print("Schema builder created successfully!")

## Step 4: Configure ModelBuilder for Local Container Mode

Set up ModelBuilder to run in LOCAL_CONTAINER mode with Docker.

In [None]:
# Configuration
MODEL_NAME_PREFIX = "pytorch-local"
ENDPOINT_NAME_PREFIX = "pytorch-local"

# Generate unique identifiers
unique_id = str(uuid.uuid4())[:8]
model_name = f"{MODEL_NAME_PREFIX}-{unique_id}"
endpoint_name = f"{ENDPOINT_NAME_PREFIX}-{unique_id}"

# Create ModelBuilder in LOCAL_CONTAINER mode
inference_spec = PyTorchInferenceSpec(model_path=temp_model_path)
model_builder = ModelBuilder(
    inference_spec=inference_spec,
    model_server=ModelServer.TORCHSERVE,
    schema_builder=schema_builder,
    mode=Mode.LOCAL_CONTAINER  # This enables Docker container mode
)

print(f"ModelBuilder configured for local container model: {model_name}")
print(f"Target endpoint: {endpoint_name}")
print("Note: This will use Docker containers locally!")

## Step 5: Build the Model

Build the model artifacts for containerized deployment.

In [None]:
# Build the model
local_model = model_builder.build(model_name=model_name)
print(f"Model Successfully Created: {local_model.model_name}")

## Step 6: Deploy in Local Container

Deploy the model in a local Docker container. This may take a few minutes to pull the container image.

In [None]:
# Deploy locally in container mode
print("Starting local container deployment...")
print("Note: This may take a few minutes to pull the Docker image on first run.")

local_endpoint = model_builder.deploy_local(
    endpoint_name=endpoint_name,
    wait=True,
    container_timeout_in_seconds=1200  # 20 minutes timeout
)

print(f"Local Container Endpoint Successfully Created: {endpoint_name}")
print("Container is now running and ready for inference!")

## Step 7: Test the Containerized Model

Send test requests to the model running in the local container.

In [None]:
# Test 1: Single prediction
test_input_1 = [[1.0, 2.0, 3.0, 4.0]]

response_1 = local_endpoint.invoke(
    body=json.dumps(test_input_1),
    content_type="application/json"
)

response_data_1 = response_1.body.read().decode('utf-8')
parsed_response_1 = json.loads(response_data_1)
print(f"Test 1 - Single prediction: {parsed_response_1}")

In [None]:
# Test 2: Batch prediction
test_input_2 = [
    [1.0, 2.0, 3.0, 4.0],
    [0.5, 1.5, 2.5, 3.5],
    [2.0, 3.0, 4.0, 5.0]
]

response_2 = local_endpoint.invoke(
    body=json.dumps(test_input_2),
    content_type="application/json"
)

response_data_2 = response_2.body.read().decode('utf-8')
parsed_response_2 = json.loads(response_data_2)
print(f"Test 2 - Batch prediction: {parsed_response_2}")

In [None]:
# Test 3: Edge case - different input ranges
test_input_3 = [[0.1, 0.2, 0.3, 0.4]]

response_3 = local_endpoint.invoke(
    body=json.dumps(test_input_3),
    content_type="application/json"
)

response_data_3 = response_3.body.read().decode('utf-8')
parsed_response_3 = json.loads(response_data_3)
print(f"Test 3 - Edge case: {parsed_response_3}")

## Step 8: Container Information

Get information about the running container.

In [None]:
# Display container information
print("Container Information:")
print(f"- Endpoint Name: {local_endpoint.endpoint_name}")
print(f"- Model Server: TorchServe")
print(f"- Container Mode: LOCAL_CONTAINER")
print(f"- Model Path: {temp_model_path}")

# You can also check Docker containers running
print("\nTo see the running container, you can run:")
print("docker ps")

## Step 9: Clean Up

Clean up the local container and temporary files.

In [None]:
# Clean up temporary model files
shutil.rmtree(temp_model_path)
print("Temporary model files cleaned up!")

# Note: Local container will be automatically cleaned up when the process ends
print("Local container will be automatically stopped when this notebook session ends.")
print("No AWS resources were created, so no cloud cleanup needed.")

## Summary

This notebook demonstrated:
1. Creating and saving a PyTorch model
2. Defining a PyTorch InferenceSpec with prepare(), load(), and invoke() methods
3. Configuring ModelBuilder for LOCAL_CONTAINER mode
4. Building and deploying models in local Docker containers
5. Testing containerized models with various inputs
6. Proper cleanup of local resources

## Benefits of Local Container Mode:
- **Container parity**: Same environment as SageMaker endpoints
- **No AWS costs**: Runs entirely locally
- **Realistic testing**: Uses actual model serving containers
- **Debugging friendly**: Can inspect container logs and behavior
- **Dependency isolation**: Container handles all dependencies

## When to Use Local Container Mode:
- Testing models before deploying to SageMaker
- Debugging inference issues
- Validating custom inference code
- Development with realistic serving environment
- CI/CD pipeline testing

Local container mode provides the perfect balance between local development speed and production environment fidelity!