
<center>
<h1> AWS SageMaker </h1>
    <h2>MLOps using AWS SageMaker </h2>
    <h3>March 23, 2023</h3>
<hr>
<h1>Real-time inference on an XGBoost Endpoint</h1>
<hr>
 </center>

In [None]:
%%capture
from typing import List

import boto3
import io
import numpy as np
import pandas as pd
import sagemaker

def convert_df_to_csv_object(df: pd.DataFrame) -> str:
    """Converts the dataframe object `df` to a payload that can be passed to the model endpoint.
    
    Args:
        df: Dataframe that is converted to a csv-file for the SageMaker model's endpoint.
    
    Returns:
        payload_as_csv: csv-file as payload.
    """
    
    assert isinstance(df, pd.DataFrame)
    
    csv_file = io.StringIO()
    # by default sagemaker expects comma separated
    df.to_csv(csv_file, sep=",", header=False, index=False)
    
    payload_as_csv = csv_file.getvalue()
    
    return payload_as_csv

def batch_prediction(payload: str, endpoint: sagemaker.predictor.Predictor) -> List[float]:
    """Function to pass a payload to the model endpoint and return a prediction.
    
    Args:
        payload: SageMaker model friendly payload (feature vector).
        endpoint: SageMaker model endpoint.
    
    Returns:
        response: List of predictions from the model.
    """
    
    # invoke the endpoint to get a prediction
    response = endpoint.predict(payload, initial_args={'ContentType': "text/csv"})
    
    # process predictions
    response = response.decode("utf-8")
    
    response = [float(i) for i in response.split(',')]
    
    # convert to array
    response = np.asarray(response)
      
    return response

Load the (preprocessed) data from S3.

In [None]:
df = (
    pd
    .read_csv('s3://sagemaker-project-p-1bbjaifjvgou/heiko-demo-p-1bbjaifjvgou-SKLearn-Prepr-2023-03-14-14-34-04-876/output/test/test_w_header.csv')
    .loc[35:38, :]
    .reset_index(drop=True)
)

In [None]:
df

Remove the target - `subtraction` - from the dataset.

In [None]:
payload = df.iloc[:, 1:]
payload

Call the endpoint. You could try with your own endpoint.

In [None]:
endpoint_name = 'heiko-demo-staging'  # You could change this to your endpoint

In [None]:
region = 'eu-west-3'

predictor = sagemaker.predictor.Predictor(
    region=region,
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker.Session(),
)

batch_prediction(payload=convert_df_to_csv_object(payload), endpoint=predictor)