In [178]:
import pandas as pd
import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role

role = get_execution_role()
sess = sagemaker.Session()

In [179]:
default_bucket = sess.default_bucket()
print(f'Default S3 bucket = {default_bucket}')
prefix = 'sagemaker-feature-store'

Default S3 bucket = sagemaker-us-east-1-707684582322


In [180]:
csv_data_location = './user-profiles/user_profiles.csv'
data_frame = pd.read_csv(csv_data_location)

In [181]:
data_frame

Unnamed: 0,User_ID,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,1.9,3.2,0.5,0.8,3.5,0.9,3.8,0.8,1.0,4.8,1.3,4.9,1.7,3.1,4.0,3.4,2.8,0.3
1,2,4.8,0.4,4.5,1.4,0.8,2.7,0.1,0.8,1.6,3.7,1.2,0.6,0.4,0.3,4.5,4.0,2.5,0.5
2,3,3.7,0.8,2.5,0.9,2.9,4.4,0.1,0.2,1.1,1.8,4.5,2.1,2.0,3.7,4.7,1.3,1.0,5.0
3,4,3.0,4.5,4.1,0.4,3.0,3.7,1.6,3.7,1.8,1.5,1.2,0.2,1.6,1.6,4.8,3.1,4.3,1.6
4,5,0.8,3.0,1.6,0.6,2.1,4.0,2.4,3.3,0.3,1.7,1.4,3.7,3.4,0.0,2.6,2.9,3.4,3.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,1.7,4.7,2.4,4.4,0.5,4.4,4.1,2.9,0.4,3.3,2.2,1.7,3.9,3.0,4.3,2.5,5.0,3.2
196,197,3.6,4.3,3.3,4.7,4.6,4.9,2.2,1.4,3.6,4.8,2.3,0.8,1.4,0.4,0.8,4.7,4.1,1.1
197,198,4.5,2.1,0.9,2.8,0.7,4.8,1.2,0.4,0.4,0.3,4.3,0.1,2.5,3.1,1.5,4.4,4.2,2.8
198,199,4.4,3.8,1.0,3.5,4.8,3.7,3.1,0.4,0.4,0.3,2.7,3.2,1.4,4.9,1.5,0.5,1.2,1.9


In [182]:
from datetime import datetime

data_frame['EventTime'] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")  # adds a new column 'EventTime' with the current datetime in ISO-8601 format
# assuming data_frame is your pandas DataFrame
data_frame['EventTime']

0      2023-08-18T01:25:39Z
1      2023-08-18T01:25:39Z
2      2023-08-18T01:25:39Z
3      2023-08-18T01:25:39Z
4      2023-08-18T01:25:39Z
               ...         
195    2023-08-18T01:25:39Z
196    2023-08-18T01:25:39Z
197    2023-08-18T01:25:39Z
198    2023-08-18T01:25:39Z
199    2023-08-18T01:25:39Z
Name: EventTime, Length: 200, dtype: object

In [183]:
from sagemaker.feature_store.feature_definition import FeatureDefinition
from sagemaker.feature_store.feature_definition import FeatureTypeEnum

feature_definitions = []
for column in data_frame.columns:
    feature_type = FeatureTypeEnum.STRING if column == 'User_ID' or column == 'EventTime' else FeatureTypeEnum.FRACTIONAL
    feature_definitions.append(FeatureDefinition(feature_name=column, feature_type=feature_type))

In [184]:
feature_definitions

[FeatureDefinition(feature_name='User_ID', feature_type=<FeatureTypeEnum.STRING: 'String'>),
 FeatureDefinition(feature_name='Action', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Adventure', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Animation', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Childrens', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Comedy', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Crime', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Documentary', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Drama', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>),
 FeatureDefinition(feature_name='Fantasy', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractiona

In [185]:
from sagemaker.feature_store.feature_group import FeatureGroup

# Create feature group
feature_group_name = 'user-profile-feature-group'
feature_group = FeatureGroup(name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sess)

In [186]:
feature_group.create(
    s3_uri=f's3://{default_bucket}/{prefix}',
    record_identifier_name='User_ID',
    event_time_feature_name='EventTime',
    role_arn=role,
    enable_online_store=True
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:707684582322:feature-group/user-profile-feature-group',
 'ResponseMetadata': {'RequestId': 'd75b0e7c-f4ff-4c07-b2d8-e7b4208e052b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd75b0e7c-f4ff-4c07-b2d8-e7b4208e052b',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '103',
   'date': 'Fri, 18 Aug 2023 01:25:44 GMT'},
  'RetryAttempts': 0}}

In [187]:
def wait_for_feature_group_creation_complete(feature_group):
    status = feature_group.describe().get('FeatureGroupStatus')
    print(f'Initial status: {status}')
    while status == 'Creating':
        print(f'Waiting for feature group: {feature_group.name} to be created ...')
        time.sleep(5)
        status = feature_group.describe().get('FeatureGroupStatus')
    if status != 'Created':
        raise SystemExit(f'Failed to create feature group {feature_group.name}: {status}')
    print(f'FeatureGroup {feature_group.name} was successfully created.')

In [188]:
wait_for_feature_group_creation_complete(feature_group)

Initial status: Creating
Waiting for feature group: user-profile-feature-group to be created ...
Waiting for feature group: user-profile-feature-group to be created ...
FeatureGroup user-profile-feature-group was successfully created.


In [189]:
# Ingest data
feature_group.ingest(data_frame=data_frame, max_workers=6, wait=True)

IngestionManagerPandas(feature_group_name='user-profile-feature-group', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f9091fb1190>, sagemaker_session=<sagemaker.session.Session object at 0x7f908d5a3b10>, max_workers=6, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f90915cd710>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [192]:
import boto3
boto_session = boto3.Session()

featurestore_runtime_client = boto_session.client('sagemaker-featurestore-runtime')
customer_id = "42"
feature_group_name = 'user-profile-feature-group'


In [193]:
feature_record = featurestore_runtime_client.get_record(FeatureGroupName=feature_group_name, 
                                                        RecordIdentifierValueAsString=customer_id)
print(feature_record)

{'ResponseMetadata': {'RequestId': '7abbe0b9-3919-4dcf-afe0-24fd9d5f0520', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '7abbe0b9-3919-4dcf-afe0-24fd9d5f0520', 'content-type': 'application/json', 'content-length': '1008', 'date': 'Fri, 18 Aug 2023 01:26:42 GMT'}, 'RetryAttempts': 0}, 'Record': [{'FeatureName': 'User_ID', 'ValueAsString': '42'}, {'FeatureName': 'Action', 'ValueAsString': '2.5'}, {'FeatureName': 'Adventure', 'ValueAsString': '4.8'}, {'FeatureName': 'Animation', 'ValueAsString': '4.9'}, {'FeatureName': 'Childrens', 'ValueAsString': '1.6'}, {'FeatureName': 'Comedy', 'ValueAsString': '3.3'}, {'FeatureName': 'Crime', 'ValueAsString': '4.0'}, {'FeatureName': 'Documentary', 'ValueAsString': '3.9'}, {'FeatureName': 'Drama', 'ValueAsString': '0.5'}, {'FeatureName': 'Fantasy', 'ValueAsString': '4.2'}, {'FeatureName': 'Film-Noir', 'ValueAsString': '2.2'}, {'FeatureName': 'Horror', 'ValueAsString': '2.8'}, {'FeatureName': 'Musical', 'ValueAsString': '0.2'}, {'FeatureN