
<center>
<h1> AWS SageMaker </h1>
    <h2>MLOps using AWS SageMaker </h2>
    <h3>March 23, 2023</h3>
<hr>
<h1>Real-time inference on an XGBoost Endpoint</h1>
<hr>
 </center>

In [3]:
from typing import List

import boto3
import io
import numpy as np
import pandas as pd
import sagemaker

def convert_df_to_csv_object(df: pd.DataFrame) -> str:
    """Converts the dataframe object `df` to a payload that can be passed to the model endpoint.
    
    Args:
        df: Dataframe that is converted to a csv-file for the SageMaker model's endpoint.
    
    Returns:
        payload_as_csv: csv-file as payload.
    """
    
    assert isinstance(df, pd.DataFrame)
    
    csv_file = io.StringIO()
    # by default sagemaker expects comma separated
    df.to_csv(csv_file, sep=",", header=False, index=False)
    
    payload_as_csv = csv_file.getvalue()
    
    return payload_as_csv

def batch_prediction(payload: str, endpoint: sagemaker.predictor.Predictor) -> List[float]:
    """Function to pass a payload to the model endpoint and return a prediction.
    
    Args:
        payload: SageMaker model friendly payload (feature vector).
        endpoint: SageMaker model endpoint.
    
    Returns:
        response: List of predictions from the model.
    """
    
    # invoke the endpoint to get a prediction
    response = endpoint.predict(payload, initial_args={'ContentType': "text/csv"})
    
    # process predictions
    response = response.decode("utf-8")
    
    response = [float(i) for i in response.split(',')]
    
    # convert to array
    response = np.asarray(response)
      
    return response

Load the (preprocessed) data from S3.

In [34]:
df = (
    pd
    .read_csv('s3://sagemaker-project-p-1bbjaifjvgou/heiko-demo-p-1bbjaifjvgou-SKLearn-Prepr-2023-03-14-14-34-04-876/output/test/test_w_header.csv')
    .loc[35:38, :]
    .reset_index(drop=True)
)

In [35]:
df

Unnamed: 0,subtraction,wind_speed,power,nacelle_direction,wind_direction,rotor_speed,generator_speed,temp_environment,temp_hydraulic_oil,temp_gear_bearing,cosphi,blade_angle_avg,hydraulic_pressure
0,0.0,0.996684,0.996058,222.05683,223.309394,0.995549,0.831045,14.154587,27.06112,55.321271,0.997626,3.97374,194.388442
1,0.0,0.960518,0.989188,226.941148,220.911624,0.98797,0.841187,14.156931,29.124152,55.913211,0.997267,3.072455,196.388316
2,1.0,1.051278,0.98723,226.372211,227.952655,1.004519,0.827129,13.743097,31.04527,57.777315,0.985665,6.121129,195.917418
3,1.0,1.126461,0.987758,222.477222,220.842641,0.988797,0.84528,14.272125,32.690493,59.139193,0.993457,7.432964,196.031172


Remove the target - `subtraction` - from the dataset.

In [69]:
payload = df.iloc[:, 1:]
payload

Unnamed: 0,wind_speed,power,nacelle_direction,wind_direction,rotor_speed,generator_speed,temp_environment,temp_hydraulic_oil,temp_gear_bearing,cosphi,blade_angle_avg,hydraulic_pressure
0,0.996684,0.996058,222.05683,223.309394,0.995549,0.831045,14.154587,27.06112,55.321271,0.997626,3.97374,194.388442
1,0.960518,0.989188,226.941148,220.911624,0.98797,0.841187,14.156931,29.124152,55.913211,0.997267,3.072455,196.388316
2,1.051278,0.98723,226.372211,227.952655,1.004519,0.827129,13.743097,31.04527,57.777315,0.985665,6.121129,195.917418
3,1.126461,0.987758,222.477222,220.842641,0.988797,0.84528,14.272125,32.690493,59.139193,0.993457,7.432964,196.031172


Call the endpoint. You can try with your own endpoint if you wish.

In [72]:
endpoint_name = 'heiko-demo-staging'
region = 'eu-west-3'

predictor = sagemaker.predictor.Predictor(
    region=region,
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker.Session(),
)

batch_prediction(payload=df_converted, endpoint=predictor)

array([0.01085647, 0.00238047, 0.8168515 , 0.79828894])