# Set Up Feature Store

## Import libraries

In [2]:
import boto3
import os
import pandas as pd
import sagemaker
import time
import warnings

from IPython.core.display import HTML
from pyathena import connect
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.session import Session

# suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# filter out the specific UserWarning related to DBAPI2 objects
warnings.filterwarnings('ignore', message="pandas only supports SQLAlchemy connectable")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [3]:
# check stored variables
%store

Stored variables and their in-db values:
bucket_name                           -> 'wizard-of-tasks-dataset-5432'
setup_dependencies_passed             -> True
setup_s3_bucket_passed                -> True


## Pull Data from Athena

In [4]:
# save Amazon information
account_id = boto3.client("sts").get_caller_identity().get("Account")
region = boto3.Session().region_name
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
s3 = boto3.client('s3', region_name=sagemaker_session.boto_region_name)

In [5]:
# get bucket_name
%store -r bucket_name
print(bucket_name)

# set S3 staging directory
s3_staging_dir = "s3://{0}/athena/staging".format(bucket_name)

# create connection
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)

wizard-of-tasks-dataset-5432


In [6]:
# set database name and table name
database_name = "wizard_of_tasks"
table_name = "data"

# pull data
statement = """SELECT * FROM {}.{}""".format(
    database_name, table_name
)
df = pd.read_sql(statement, conn)
df.head(5)

Unnamed: 0,question,intent_question,history,conversation_id,document_url_question,domain_question,text_answer,intent_answer,domain_answer,question_id,title,description,ingredients,steps
0,How do we prepare the tree?,ask_question_recipe_steps,student: I'm ready for the first step now plea...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Have you selected a pot? This is a very import...,answer_question_recipe_steps,diy,diy-1-1,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...
1,"Ok, I have a nice dark green pot, perfect. Wha...",ask_question_ingredients_tools,student: I've got an idea of where I want it t...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Next we will prepare the tree for potting. The...,answer_question_recipe_steps,diy,diy-1-2,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...
2,Does that mean basil grows best in the spring ...,ask_question_recipe_steps,"student: Gotcha! Once I have all those tools,...",Wizard-of-Task-diy-2,https://www.wikihow.com/Grow-Basil,diy,"Yes, like most plants, basil likes a temperate...",answer_question_recipe_steps,diy,diy-2-3,How to Grow Basil,"\nBasil is easy to grow, and transforms ordina...",,"['Choose the kind of basil you wish to grow.',..."
3,I don't really have access to those right now ...,ask_question_recipe_steps,"student: Okay, now what should I do after that...",Wizard-of-Task-diy-3,https://www.wikihow.com/Remove-Salt-Build-up-o...,diy,You can just rub it on the main zipper piece,answer_question_recipe_steps,diy,diy-3-1,How to Remove Salt Build up on a Zipper,\nWhether it’s from roads and sidewalks in the...,,"['Open the zipper as much as possible.', 'Use ..."
4,If I could only choose one thing to decoupage ...,ask_question_recipe_steps,student: What is the easiest type of material ...,Wizard-of-Task-diy-5,https://www.wikihow.com/Decoupage,diy,I would highly recommend either decoupaging wo...,answer_question_recipe_steps,diy,diy-5-5,How to Decoupage,\nIf you'd like to give new life to a piece of...,,"""['Cover your workspace with paper to protect ..."


## Prepare Data for Feature Store

In [7]:
# add an event time
df['event_time'] = pd.Timestamp.now()
df.head()

Unnamed: 0,question,intent_question,history,conversation_id,document_url_question,domain_question,text_answer,intent_answer,domain_answer,question_id,title,description,ingredients,steps,event_time
0,How do we prepare the tree?,ask_question_recipe_steps,student: I'm ready for the first step now plea...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Have you selected a pot? This is a very import...,answer_question_recipe_steps,diy,diy-1-1,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...,2024-06-10 22:05:20.702680
1,"Ok, I have a nice dark green pot, perfect. Wha...",ask_question_ingredients_tools,student: I've got an idea of where I want it t...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Next we will prepare the tree for potting. The...,answer_question_recipe_steps,diy,diy-1-2,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...,2024-06-10 22:05:20.702680
2,Does that mean basil grows best in the spring ...,ask_question_recipe_steps,"student: Gotcha! Once I have all those tools,...",Wizard-of-Task-diy-2,https://www.wikihow.com/Grow-Basil,diy,"Yes, like most plants, basil likes a temperate...",answer_question_recipe_steps,diy,diy-2-3,How to Grow Basil,"\nBasil is easy to grow, and transforms ordina...",,"['Choose the kind of basil you wish to grow.',...",2024-06-10 22:05:20.702680
3,I don't really have access to those right now ...,ask_question_recipe_steps,"student: Okay, now what should I do after that...",Wizard-of-Task-diy-3,https://www.wikihow.com/Remove-Salt-Build-up-o...,diy,You can just rub it on the main zipper piece,answer_question_recipe_steps,diy,diy-3-1,How to Remove Salt Build up on a Zipper,\nWhether it’s from roads and sidewalks in the...,,"['Open the zipper as much as possible.', 'Use ...",2024-06-10 22:05:20.702680
4,If I could only choose one thing to decoupage ...,ask_question_recipe_steps,student: What is the easiest type of material ...,Wizard-of-Task-diy-5,https://www.wikihow.com/Decoupage,diy,I would highly recommend either decoupaging wo...,answer_question_recipe_steps,diy,diy-5-5,How to Decoupage,\nIf you'd like to give new life to a piece of...,,"""['Cover your workspace with paper to protect ...",2024-06-10 22:05:20.702680


In [8]:
# check column types
df.dtypes

question                         object
intent_question                  object
history                          object
conversation_id                  object
document_url_question            object
domain_question                  object
text_answer                      object
intent_answer                    object
domain_answer                    object
question_id                      object
title                            object
description                      object
ingredients                      object
steps                            object
event_time               datetime64[us]
dtype: object

In [9]:
# function to convert object or category columns to string
def cast_object_to_string(data_frame):
    for label in data_frame.columns:
        if data_frame.dtypes[label] == "object":
            data_frame[label] = data_frame[label].astype("str").astype("string")
        if data_frame.dtypes[label] == "category":
            data_frame[label] = data_frame[label].astype("str").astype("string")

# update neighborhood dataframe
cast_object_to_string(df)

In [10]:
# Convert datetime to Unix timestamp (seconds since epoch)
df['event_time'] = df['event_time'].astype(int) / 10**9

In [11]:
# check column types
df.dtypes

question                 string[python]
intent_question          string[python]
history                  string[python]
conversation_id          string[python]
document_url_question    string[python]
domain_question          string[python]
text_answer              string[python]
intent_answer            string[python]
domain_answer            string[python]
question_id              string[python]
title                    string[python]
description              string[python]
ingredients              string[python]
steps                    string[python]
event_time                      float64
dtype: object

## Set Up Feature Store

In [12]:
# set up Feature Store Session with AWS
boto_session = boto3.Session(region_name=region)
sagemaker_client = boto_session.client(service_name="sagemaker", region_name=region)
featurestore_runtime = boto_session.client(
    service_name="sagemaker-featurestore-runtime", region_name=region
)
feature_store_session = Session(
    boto_session=boto_session,
    sagemaker_client=sagemaker_client,
    sagemaker_featurestore_runtime_client=featurestore_runtime,
)

In [13]:
# define feature group name
feature_group_name = "conversations"

# define feature group
feature_group = FeatureGroup(
    name=feature_group_name, sagemaker_session=feature_store_session
)

# load feature definitions to the feature group
feature_group.load_feature_definitions(data_frame=df)

[FeatureDefinition(feature_name='question', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='intent_question', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='history', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='conversation_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='document_url_question', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='domain_question', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='text_answer', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='intent_answer', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name

In [14]:
# create feature groups in feature store
def wait_for_feature_group_creation_complete(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group Creation")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    if status != "Created":
        raise RuntimeError(f"Failed to create feature group {feature_group.name}")
    print(f"FeatureGroup {feature_group.name} successfully created.")


# record identifier and event time feature names
record_identifier_feature_name = "question_id"
event_time_feature_name = "event_time"    
prefix = "feature_store"

feature_group.create(
    s3_uri=f"s3://{bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name=event_time_feature_name,
    role_arn=role,
    enable_online_store=True,
)

wait_for_feature_group_creation_complete(feature_group=feature_group)

Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
Waiting for Feature Group Creation
FeatureGroup conversations successfully created.


In [15]:
# check that the feature group exists
feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:711667138246:feature-group/conversations',
 'FeatureGroupName': 'conversations',
 'RecordIdentifierFeatureName': 'question_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'question', 'FeatureType': 'String'},
  {'FeatureName': 'intent_question', 'FeatureType': 'String'},
  {'FeatureName': 'history', 'FeatureType': 'String'},
  {'FeatureName': 'conversation_id', 'FeatureType': 'String'},
  {'FeatureName': 'document_url_question', 'FeatureType': 'String'},
  {'FeatureName': 'domain_question', 'FeatureType': 'String'},
  {'FeatureName': 'text_answer', 'FeatureType': 'String'},
  {'FeatureName': 'intent_answer', 'FeatureType': 'String'},
  {'FeatureName': 'domain_answer', 'FeatureType': 'String'},
  {'FeatureName': 'question_id', 'FeatureType': 'String'},
  {'FeatureName': 'title', 'FeatureType': 'String'},
  {'FeatureName': 'description', 'FeatureType': 'String'},
  {'FeatureName': 'ingredients', 'Feature

## Ingest Data into Feature Store

In [16]:
# ingest data
feature_group.ingest(data_frame=df, max_workers=3, wait=True)

IngestionManagerPandas(feature_group_name='conversations', feature_definitions={'question': {'FeatureName': 'question', 'FeatureType': 'String'}, 'intent_question': {'FeatureName': 'intent_question', 'FeatureType': 'String'}, 'history': {'FeatureName': 'history', 'FeatureType': 'String'}, 'conversation_id': {'FeatureName': 'conversation_id', 'FeatureType': 'String'}, 'document_url_question': {'FeatureName': 'document_url_question', 'FeatureType': 'String'}, 'domain_question': {'FeatureName': 'domain_question', 'FeatureType': 'String'}, 'text_answer': {'FeatureName': 'text_answer', 'FeatureType': 'String'}, 'intent_answer': {'FeatureName': 'intent_answer', 'FeatureType': 'String'}, 'domain_answer': {'FeatureName': 'domain_answer', 'FeatureType': 'String'}, 'question_id': {'FeatureName': 'question_id', 'FeatureType': 'String'}, 'title': {'FeatureName': 'title', 'FeatureType': 'String'}, 'description': {'FeatureName': 'description', 'FeatureType': 'String'}, 'ingredients': {'FeatureName':

In [17]:
# test it works
featurestore_runtime.get_record(
    FeatureGroupName=feature_group_name,
    RecordIdentifierValueAsString="diy-1-1",
)

{'ResponseMetadata': {'RequestId': 'ca649703-c586-4657-80c8-c2db839dc260',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ca649703-c586-4657-80c8-c2db839dc260',
   'content-type': 'application/json',
   'content-length': '3019',
   'date': 'Mon, 10 Jun 2024 22:06:11 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'question',
   'ValueAsString': 'How do we prepare the tree? '},
  {'FeatureName': 'intent_question',
   'ValueAsString': 'ask_question_recipe_steps'},
  {'FeatureName': 'history',
   'ValueAsString': "student: I'm ready for the first step now please.  | teacher: That is great to hear, first you need to select a suitable species of tree for your climate | student: I've got an idea of where I want it to grow, what's the next step that I need to take now?  | teacher: Now we prepare the tree."},
  {'FeatureName': 'conversation_id', 'ValueAsString': 'Wizard-of-Task-diy-1'},
  {'FeatureName': 'document_url_question',
   'ValueAsString': 'https://www.wikih

## Shut down notebook resources

In [18]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [19]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}

<IPython.core.display.Javascript object>