### Example for retrieving feature store records from SageMaker Endpoint
Assumptions - Must have a Feature Group Provisioned with an id

Custom inference file

In [1]:
%%writefile inference.py

import json
import os
import pickle as pkl
import time
import sys
import subprocess
import numpy as np

subprocess.check_call([sys.executable, "-m", "pip", "install", "sagemaker"])

import boto3
import sagemaker

import sagemaker_xgboost_container.encoder as xgb_encoders

boto_session = boto3.Session()
boto_fs_client = boto_session.client(service_name='sagemaker-featurestore-runtime')

def model_fn(model_dir):
    print ('processing - in model_fn')
    return None


def input_fn(request_body, request_content_type):
    print ('processing - in input_fn')
    return request_body


def predict_fn(input_data, model):
    print ('processing - in predict_fn')
    
    params = input_data.split(',')
    fg_name = params[0]
    input_feat_id = int(params[1])
    
    
    start = time.time()
    rec = boto_fs_client.get_record(FeatureGroupName=fg_name, RecordIdentifierValueAsString=str(input_feat_id))
    feat = rec.get('Record', None)
    if not feat:
        print (f'processing - unable to read feature record with id {input_feat_id}, for fg {fg_name}')
    else:
        resp_feat_id = feat[0]['ValueAsString']
        print (f'processing - successfull get of feature record with id {resp_feat_id}, in fg {fg_name}')
    end = time.time()
    duration = end-start
    
    print (f'processing - duration = {duration}')
    
    return duration


def output_fn(prediction, content_type):
    print ('processing - output_fn', prediction)
    return str(prediction)


Writing inference.py


Deploy/Create a sagemaker endpoint using xgboost model

In [2]:
import sagemaker
from sagemaker.xgboost.model import XGBoostModel
from sagemaker.serializers import CSVSerializer

role = sagemaker.get_execution_role()



xgboost_inference_model = XGBoostModel(
    model_data="s3://fs-get-record/sagemaker/xgboost_credit_risk/output/xgboost-2021-06-02-14-55-49-198/output/model.tar.gz",
    role=role,
    entry_point="inference.py",
    framework_version="1.2-2"
)

predictor = xgboost_inference_model.deploy(
    instance_type='ml.c4.xlarge',
    initial_instance_count=1,
    serializer=CSVSerializer(),
)

---------------!

Single run, the predictor returns a duration (secs). The feature group should be provisioned with the id

In [6]:
fg_name = 'ingest-fg-05-25-2021-17-24-43'
feat_id = 20000
req = f'{fg_name}, {feat_id}'
resp = predictor.predict(req)
duration = resp[0][0]
duration

'0.08763742446899414'

Helper function to print stats

In [7]:
def get_stats(times):
    import numpy as np
    a = np.array(times)
    p_50 = np.percentile(a, 50)
    p_95 = np.percentile(a, 95)
    p_99 = np.percentile(a, 99)
    avg = np.average(a)
    
    return {
        'avg':avg, 
        'p_50': p_50, 
        'p_95': p_95, 
        'p_99': p_99}

In [8]:
fg_name = 'ingest-fg-05-25-2021-17-24-43'
feat_id = 20000
times = []
for i in range(1000):
    req = f'{fg_name}, {feat_id}'
    resp = predictor.predict(req)
    duration = float(resp[0][0])
    times.append(duration)
    
get_stats(times)

{'avg': 0.011606650114059449,
 'p_50': 0.009004950523376465,
 'p_95': 0.021177470684051514,
 'p_99': 0.07053253650665284}

Create Feature Group, provision sample data and retrieve, to simulate cold starts

In [44]:
import sagemaker
import pandas as pd
sm_session = sagemaker.Session()
role = sagemaker.get_execution_role()

Generate Sample Data

In [45]:
num_fgs = 25
data_feat_ids = [i for i in range(1, num_fgs+1)]
data_feat_eventtimes = ['2020-12-21T01:00:00Z' for i in range(1, num_fgs+1)]
data_feat_qtys = [i *10 for i in range(1, num_fgs+1)]
data_fg_names = ['test-fg-'+str(i) for i in range(1, num_fgs+1)]

Helpers to provision and ingest sample data into feature groups

In [46]:
from sagemaker.feature_store.feature_group import FeatureGroup
import time
def cast_object_to_string(df):
    for col in df.columns:
        if df.dtypes[col] == 'object':
            df[col] = df[col].astype('str').astype('string')
            
def wait_for_feature_group_creation_complete(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
#        print("Waiting for Feature Group Creation")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    if status != "Created":
        raise RuntimeError(f"Failed to create feature group {feature_group.name}")
#   print(f"FeatureGroup {feature_group.name} successfully created.")

def ingest_data(seed_data):
    for feat_id, feat_time, feat_qty, fg_name in seed_data:
        tmp = {
            'feat_id': [feat_id],
            'feat_time': [feat_time],
            'feat_Qty': [feat_qty]
        }
        df = pd.DataFrame(data=tmp)
        # cast object dtype to string. The SageMaker FeatureStore Python SDK will then map the string dtype to String feature type.
        cast_object_to_string(df)
        fg = FeatureGroup(name=fg_name, sagemaker_session=sm_session)
        fg.load_feature_definitions(data_frame=df)
        

        kwargs = dict(
            s3_uri = False,
            record_identifier_name='feat_id',
            event_time_feature_name='feat_time',
            role_arn=role,
            enable_online_store=True
        )
        fg.create(**kwargs)
        wait_for_feature_group_creation_complete(fg)
        
        fg.ingest(data_frame=df, wait=True)

In [47]:
sample_data = zip(data_feat_ids, data_feat_eventtimes, data_feat_qtys, data_fg_names)
ingest_data(sample_data)    

Call get record for newly created feature groups to simulate cold times

In [48]:
times = []
for i in range(len(data_fg_names)):
    fg_name = data_fg_names[i]
    feat_id = data_feat_ids[i]
    req = f'{fg_name}, {feat_id}'
    resp = predictor.predict(req)
    duration = float(resp[0][0])
    times.append(duration)
    
get_stats(times)

{'avg': 0.03612194061279297,
 'p_50': 0.026401758193969727,
 'p_95': 0.0862349510192871,
 'p_99': 0.10780010223388667}

In [None]:
#delete feature groups
for fg_name in data_fg_names:
    fg = FeatureGroup(name=fg_name, sagemaker_session=sm_session)
    fg.delete()

In [None]:
# delete endpoint
predictor.delete_endpoint()