In [1]:
import sagemaker
import boto3
import sys
import pandas as pd
import numpy as np 
import io 
from sagemaker.session import Session
from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


### Setup AWS SageMaker Session and Define Resources

In [44]:
# Define a unique prefix for SageMaker resources to ensure organized naming in AWS.
prefix = "aws-mlops-sagemaker"

# Retrieve the IAM role for SageMaker execution. This role defines permissions for accessing resources.
role = get_execution_role()

# Initialize a SageMaker session, which handles interactions with SageMaker resources in AWS.
sagemaker_session = Session() 

# Get the AWS region of the current session to align resources to the appropriate region.
region = sagemaker_session.boto_region_name

# Define the S3 bucket for storage, using SageMaker's default bucket in this region.
s3_bucket_name = sagemaker_session.default_bucket()

# Print the bucket name, region, and role to verify configurations.
print("S3 Bucket Name:", s3_bucket_name)
print("AWS Region:", region)
print("IAM Role:", role)


S3 Bucket Name: sagemaker-us-east-1-607119565685
AWS Region: us-east-1
IAM Role: arn:aws:iam::607119565685:role/LearnMlOpsSageMakerExecutionRole


### Loading Customer and Order Data

In [45]:
# Load customer data from CSV file
customer_data = pd.read_csv("customers.csv") 

# Load order data from CSV file
orders_data = pd.read_csv("orders.csv")


### Displaying the First Few Rows of Customer Data

In [46]:
# Display the first five rows of the customer_data DataFrame to inspect the data
customer_data.head()

Unnamed: 0,customer_id,sex,is_married,event_time,age_18-29,age_30-39,age_40-49,age_50-59,age_60-69,age_70-plus,n_days_active
0,C1,0,1,2024-05-02T05:39:10.965Z,0,0,0,0,0,1,0.203425
1,C2,1,1,2024-05-02T05:39:10.966Z,0,0,0,0,0,1,0.859589
2,C3,1,1,2024-05-02T05:39:10.967Z,0,1,0,0,0,0,0.527397
3,C4,1,0,2024-05-02T05:39:10.967Z,0,0,0,0,1,0,0.780822
4,C5,1,0,2024-05-02T05:39:10.968Z,0,0,0,0,1,0,0.691096


### Displaying the First Few Rows of Order Data

In [49]:
# Display the first five rows of the orders_data DataFrame to inspect the data
orders_data.head()

Unnamed: 0,order_id,customer_id,product_id,purchase_amount,is_reordered,event_time,n_days_since_last_purchase
0,O1,C9765,P11660,0.572673,0,2024-05-02T03:53:40.523Z,0.273256
1,O2,C3674,P6868,0.693861,0,2024-05-02T03:53:40.523Z,0.846899
2,O3,C2139,P4749,0.556139,1,2024-05-02T03:53:40.523Z,0.408915
3,O4,C7794,P542,0.043069,1,2024-05-02T03:53:40.523Z,0.843023
4,O5,C2229,P7605,0.463861,1,2024-05-02T03:53:40.523Z,0.265504


### Creating Unique Feature Group Names for Customers and Orders

In [51]:
from time import gmtime, strftime, sleep

# Generate a unique feature group name for customers using the current GMT time
customers_feature_group_name = "customers-fg-" + strftime("%d-%H-%M-%S", gmtime()) 

# Generate a unique feature group name for orders using the current GMT time
orders_feature_group_name = "orders-fg-" + strftime("%d-%H-%M-%S", gmtime())

# Display the generated feature group names
print("Customers Feature Group Name:", customers_feature_group_name)
print("Orders Feature Group Name:", orders_feature_group_name)



Customers Feature Group Name: customers-fg-05-14-48-07
Orders Feature Group Name: orders-fg-05-14-48-07


### Creating Feature Groups in Amazon SageMaker

In [52]:
from sagemaker.feature_store.feature_group import FeatureGroup

# Create a FeatureGroup object for customers using the unique feature group name
customers_feature_group = FeatureGroup(
    name=customers_feature_group_name, 
    sagemaker_session=sagemaker_session
)

# Create a FeatureGroup object for orders using the unique feature group name
orders_feature_group = FeatureGroup(
    name=orders_feature_group_name, 
    sagemaker_session=sagemaker_session
)

### Defining Record Identifier for Customer & Orders Feature Group

In [53]:
# Define the record identifier feature name for the customer & orders feature group
record_identifier_feature_name = "customer_id"

### Displaying Summary Information of Order Data

In [54]:
# Display summary information about the orders_data DataFrame
orders_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 7 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   order_id                    100000 non-null  object 
 1   customer_id                 100000 non-null  object 
 2   product_id                  100000 non-null  object 
 3   purchase_amount             100000 non-null  float64
 4   is_reordered                100000 non-null  int64  
 5   event_time                  100000 non-null  object 
 6   n_days_since_last_purchase  100000 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 5.3+ MB


### Loading Feature Definitions for Customer and Order Feature Groups

In [55]:
# Load feature definitions into the customer feature group from the customer_data DataFrame
customers_feature_group.load_feature_definitions(data_frame=customer_data)

# Load feature definitions into the order feature group from the orders_data DataFrame
orders_feature_group.load_feature_definitions(data_frame=orders_data)

[FeatureDefinition(feature_name='order_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='customer_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='product_id', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='purchase_amount', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>, collection_type=None),
 FeatureDefinition(feature_name='is_reordered', feature_type=<FeatureTypeEnum.INTEGRAL: 'Integral'>, collection_type=None),
 FeatureDefinition(feature_name='event_time', feature_type=<FeatureTypeEnum.STRING: 'String'>, collection_type=None),
 FeatureDefinition(feature_name='n_days_since_last_purchase', feature_type=<FeatureTypeEnum.FRACTIONAL: 'Fractional'>, collection_type=None)]

### Creating Customer Feature Group in Amazon SageMaker Feature Store

In [56]:
# Create the customer feature group in Amazon SageMaker Feature Store
customers_feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",  # S3 URI for storing feature group data
    record_identifier_name=record_identifier_feature_name,  # Name of the record identifier feature
    event_time_feature_name="event_time",  # Name of the event time feature
    role_arn=role,  # IAM role with permissions to access the feature store
    enable_online_store=True  # Enable online store for real-time access
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/customers-fg-05-14-48-07',
 'ResponseMetadata': {'RequestId': '251e4189-9fc1-4b8f-b982-bca8ff3bb6cc',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '251e4189-9fc1-4b8f-b982-bca8ff3bb6cc',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '101',
   'date': 'Tue, 05 Nov 2024 14:52:45 GMT'},
  'RetryAttempts': 0}}

### Creating Order Feature Group in Amazon SageMaker Feature Store

In [57]:
# Create the order feature group in Amazon SageMaker Feature Store
orders_feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",  # S3 URI for storing feature group data
    record_identifier_name=record_identifier_feature_name,  # Name of the record identifier feature
    event_time_feature_name="event_time",  # Name of the event time feature
    role_arn=role,  # IAM role with permissions to access the feature store
    enable_online_store=True  # Enable online store for real-time access
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/orders-fg-05-14-48-07',
 'ResponseMetadata': {'RequestId': '68c966f8-779e-4732-890c-1bc9e0daabf1',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '68c966f8-779e-4732-890c-1bc9e0daabf1',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '98',
   'date': 'Tue, 05 Nov 2024 14:53:23 GMT'},
  'RetryAttempts': 0}}

### Describing the Customer Feature Group

In [58]:
# Display the details and schema of the customer feature group
customers_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/customers-fg-05-14-48-07',
 'FeatureGroupName': 'customers-fg-05-14-48-07',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'String'},
  {'FeatureName': 'sex', 'FeatureType': 'Integral'},
  {'FeatureName': 'is_married', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'},
  {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2024, 11, 5, 14, 52, 45, 113000, tzinfo=tzlocal())

### Describing the Order Feature Group

In [59]:
# Display the details and schema of the order feature group
orders_feature_group.describe() 

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/orders-fg-05-14-48-07',
 'FeatureGroupName': 'orders-fg-05-14-48-07',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'order_id', 'FeatureType': 'String'},
  {'FeatureName': 'customer_id', 'FeatureType': 'String'},
  {'FeatureName': 'product_id', 'FeatureType': 'String'},
  {'FeatureName': 'purchase_amount', 'FeatureType': 'Fractional'},
  {'FeatureName': 'is_reordered', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'n_days_since_last_purchase', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2024, 11, 5, 14, 53, 23, 287000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-us-east-1-607119565685/aws-mlops-sagemaker',
   'ResolvedOutputS3Uri': 's3://sagemaker-us-east-1-607

### Updating Metadata for Customer Feature in Feature Group

In [60]:
from sagemaker.feature_store.inputs import FeatureParameter

# Update the metadata for the 'customer_id' feature in the customer feature group
customers_feature_group.update_feature_metadata(
    feature_name="customer_id",  # Name of the feature to update
    description="The ID of the customer, it is also part of the Order feature group",  # Description of the feature
    parameter_additions=[FeatureParameter("idType", "primarykey")]  # Additional parameters for the feature
)

{'ResponseMetadata': {'RequestId': 'e38fee81-8c93-40ab-b654-ef4e4c691865',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e38fee81-8c93-40ab-b654-ef4e4c691865',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Tue, 05 Nov 2024 14:55:28 GMT',
   'content-length': '0'},
  'RetryAttempts': 0}}

### Describing Metadata for Customer ID Feature

In [61]:
# Retrieve and display metadata for the 'customer_id' feature in the customer feature group
customers_feature_group.describe_feature_metadata("customer_id")

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/customers-fg-05-14-48-07',
 'FeatureGroupName': 'customers-fg-05-14-48-07',
 'FeatureName': 'customer_id',
 'FeatureType': 'String',
 'CreationTime': datetime.datetime(2024, 11, 5, 14, 52, 45, 113000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2024, 11, 5, 14, 55, 28, 630000, tzinfo=tzlocal()),
 'Description': 'The ID of the customer, it is also part of the Order feature group',
 'Parameters': [{'Key': 'idType', 'Value': 'primarykey'}],
 'ResponseMetadata': {'RequestId': 'b861076d-e86d-48d7-918a-6474cd76b5cc',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b861076d-e86d-48d7-918a-6474cd76b5cc',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '401',
   'date': 'Tue, 05 Nov 2024 14:56:10 GMT'},
  'RetryAttempts': 0}}

### Searching for Feature Metadata in Amazon SageMaker

In [63]:
# Search for feature metadata in SageMaker using specified filters
search_results = sagemaker_session.boto_session.client("sagemaker", region_name=region).search(
    Resource="FeatureMetadata",
    SearchExpression={
        "Filters": [
            {
                "Name": "FeatureGroupName",  # Filter by feature group name
                "Operator": "Contains",  # Operator to match feature group names
                "Value": "customers-fg"  # Value to match in the feature group name
            },
            {
                "Name": "Parameters.idType",  # Filter by feature parameter
                "Operator": "Equals",  # Operator to match parameter values
                "Value": "primarykey"  # Value to match the parameter type
            }
        ]
    }
)
search_results

{'Results': [{'FeatureMetadata': {'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/customers-fg-05-14-48-07',
    'FeatureGroupName': 'customers-fg-05-14-48-07',
    'FeatureName': 'customer_id',
    'FeatureType': 'String',
    'CreationTime': datetime.datetime(2024, 11, 5, 14, 52, 45, tzinfo=tzlocal()),
    'LastModifiedTime': datetime.datetime(2024, 11, 5, 14, 55, 28, tzinfo=tzlocal()),
    'Description': 'The ID of the customer, it is also part of the Order feature group',
    'Parameters': [{'Key': 'idType', 'Value': 'primarykey'}]}},
  {'FeatureMetadata': {'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/customers-fg-05-14-02-24',
    'FeatureGroupName': 'customers-fg-05-14-02-24',
    'FeatureName': 'customer_id',
    'FeatureType': 'String',
    'CreationTime': datetime.datetime(2024, 11, 5, 14, 4, 36, tzinfo=tzlocal()),
    'LastModifiedTime': datetime.datetime(2024, 11, 5, 14, 6, 32, tzinfo=tzlocal()),
    'Description': 'Th

# Ingest the data to the Feature Group

### Ingesting Customer Data into the Feature Group

In [64]:
# Ingest customer data into the customer feature group
customers_feature_group.ingest(
    data_frame=customer_data,  # DataFrame containing the customer data to be ingested
    max_workers=3,  # Maximum number of concurrent workers to use for ingestion
    wait=True  # Wait for the ingestion process to complete before proceeding
)

IngestionManagerPandas(feature_group_name='customers-fg-05-14-48-07', feature_definitions={'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'sex': {'FeatureName': 'sex', 'FeatureType': 'Integral'}, 'is_married': {'FeatureName': 'is_married', 'FeatureType': 'Integral'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}, 'age_18-29': {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'}, 'age_30-39': {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'}, 'age_40-49': {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'}, 'age_50-59': {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'}, 'age_60-69': {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'}, 'age_70-plus': {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'}, 'n_days_active': {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f45db90c1d0>, sagemaker_session=<sagemaker.session.Session o

In [27]:
# Ingest order data into the orders feature group
orders_feature_group.ingest(
    data_frame=orders_data,  # DataFrame containing the order data to be ingested
    max_workers=3,  # Maximum number of concurrent workers to use for ingestion
    wait=True  # Wait for the ingestion process to complete before proceeding
)

IngestionManagerPandas(feature_group_name='orders-fg-05-14-02-24', feature_definitions={'order_id': {'FeatureName': 'order_id', 'FeatureType': 'String'}, 'customer_id': {'FeatureName': 'customer_id', 'FeatureType': 'String'}, 'product_id': {'FeatureName': 'product_id', 'FeatureType': 'String'}, 'purchase_amount': {'FeatureName': 'purchase_amount', 'FeatureType': 'Fractional'}, 'is_reordered': {'FeatureName': 'is_reordered', 'FeatureType': 'Integral'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}, 'n_days_since_last_purchase': {'FeatureName': 'n_days_since_last_purchase', 'FeatureType': 'Fractional'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f45ea36de10>, sagemaker_session=<sagemaker.session.Session object at 0x7f45ea6ad250>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f45e2bc5510>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

### Retrieving a Sample Record from the Customer Feature Group

In [65]:
# Define the customer ID for which to retrieve the record
customer_id = "C400"

# Retrieve a sample record from the customer feature group using the customer ID
sample_record = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).get_record(
    FeatureGroupName=customers_feature_group_name,  # Name of the feature group
    RecordIdentifierValueAsString=str(customer_id)  # Customer ID to identify the record
)

# Display the retrieved sample record
sample_record


{'ResponseMetadata': {'RequestId': '55a8bc59-d411-411a-8981-941c03dc31ff',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '55a8bc59-d411-411a-8981-941c03dc31ff',
   'content-type': 'application/json',
   'content-length': '879',
   'date': 'Tue, 05 Nov 2024 14:59:04 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C400'},
  {'FeatureName': 'sex', 'ValueAsString': '1'},
  {'FeatureName': 'is_married', 'ValueAsString': '0'},
  {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T05:39:11.217Z'},
  {'FeatureName': 'age_18-29', 'ValueAsString': '1'},
  {'FeatureName': 'age_30-39', 'ValueAsString': '0'},
  {'FeatureName': 'age_40-49', 'ValueAsString': '0'},
  {'FeatureName': 'age_50-59', 'ValueAsString': '0'},
  {'FeatureName': 'age_60-69', 'ValueAsString': '0'},
  {'FeatureName': 'age_70-plus', 'ValueAsString': '0'},
  {'FeatureName': 'n_days_active', 'ValueAsString': '0.9787671232876712'}]}

### Batch Retrieving Records from Customer and Order Feature Groups

In [66]:
# Batch retrieve records from both customer and order feature groups
all_records = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).batch_get_record(
    Identifiers=[
        {
            "FeatureGroupName": customers_feature_group_name,  # Name of the customer feature group
            "RecordIdentifiersValueAsString": ["C400", "C401"],  # Customer IDs to retrieve
        },
        {
            "FeatureGroupName": orders_feature_group_name,  # Name of the order feature group
            "RecordIdentifiersValueAsString": ["C400", "C401"],  # Order IDs to retrieve
        },
    ]
)

# Display the retrieved records from both feature groups
all_records

{'ResponseMetadata': {'RequestId': 'a430bc3a-4354-4335-a47f-ab4f50c6a473',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a430bc3a-4354-4335-a47f-ab4f50c6a473',
   'content-type': 'application/json',
   'content-length': '1983',
   'date': 'Tue, 05 Nov 2024 14:59:46 GMT'},
  'RetryAttempts': 0},
 'Records': [{'FeatureGroupName': 'customers-fg-05-14-48-07',
   'RecordIdentifierValueAsString': 'C400',
   'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C400'},
    {'FeatureName': 'sex', 'ValueAsString': '1'},
    {'FeatureName': 'is_married', 'ValueAsString': '0'},
    {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T05:39:11.217Z'},
    {'FeatureName': 'age_18-29', 'ValueAsString': '1'},
    {'FeatureName': 'age_30-39', 'ValueAsString': '0'},
    {'FeatureName': 'age_40-49', 'ValueAsString': '0'},
    {'FeatureName': 'age_50-59', 'ValueAsString': '0'},
    {'FeatureName': 'age_60-69', 'ValueAsString': '0'},
    {'FeatureName': 'age_70-plus', 'ValueA

### Adding New Features to the Customer Feature Group

In [74]:
from sagemaker.feature_store.feature_definition import StringFeatureDefinition

# Add a new feature 'email' to the customer feature group
customers_feature_group.update(
    feature_additions=[StringFeatureDefinition("email")]  # Define the new feature as a string type
)

ClientError: An error occurred (ValidationException) when calling the UpdateFeatureGroup operation: Validation Error: Feature [email] already exists.

### Describing the Customer Feature Group After Updates

In [None]:
# Retrieve and display details of the customer feature group after adding new features
customers_feature_group.describe

### Loading and Displaying Updated Customer Data

In [69]:
# Load the updated customer data from CSV file
customer_data_updated = pd.read_csv("customers_updated.csv")

# Display the first five rows of the updated customer data DataFrame
customer_data_updated.head()

Unnamed: 0,customer_id,sex,is_married,event_time,email,age_18-29,age_30-39,age_40-49,age_50-59,age_60-69,age_70-plus,n_days_active
0,C10001,0,1,2024-05-02T13:43:28.295Z,kennethday@example.net,0,0,0,0,0,1,0.34589
1,C10002,0,1,2024-05-02T13:43:28.296Z,smelendez@example.org,0,0,0,0,0,1,0.884247
2,C10003,1,0,2024-05-02T13:43:28.298Z,brianperry@example.net,0,1,0,0,0,0,0.734247
3,C10004,0,0,2024-05-02T13:43:28.300Z,alyssamorgan@example.net,0,0,0,0,1,0,0.386301
4,C10005,0,1,2024-05-02T13:43:28.301Z,cjenkins@example.org,0,0,0,0,1,0,0.363699


### Describing the Customer Feature Group to Verify Structure

In [70]:
# Retrieve and display the current metadata and schema of the customer feature group
customers_feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:607119565685:feature-group/customers-fg-05-14-48-07',
 'FeatureGroupName': 'customers-fg-05-14-48-07',
 'RecordIdentifierFeatureName': 'customer_id',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'customer_id',
   'FeatureType': 'String'},
  {'FeatureName': 'sex', 'FeatureType': 'Integral'},
  {'FeatureName': 'is_married', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'String'},
  {'FeatureName': 'age_18-29', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_30-39', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_40-49', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_50-59', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_60-69', 'FeatureType': 'Integral'},
  {'FeatureName': 'age_70-plus', 'FeatureType': 'Integral'},
  {'FeatureName': 'n_days_active', 'FeatureType': 'Fractional'},
  {'FeatureName': 'email', 'FeatureType': 'String'}],
 'CreationTime': datetime.dateti

In [71]:
# Display the column names of the updated customer data DataFrame
customer_data_updated.columns

Index(['customer_id', 'sex', 'is_married', 'event_time', 'email', 'age_18-29',
       'age_30-39', 'age_40-49', 'age_50-59', 'age_60-69', 'age_70-plus',
       'n_days_active'],
      dtype='object')

### Ingesting Updated Customer Data into the Feature Group

In [72]:
# Ingest the updated customer data into the customer feature group
customers_feature_group.ingest(
    data_frame=customer_data_updated,  # DataFrame containing the updated customer data
    max_workers=3,  # Maximum number of concurrent workers to use for ingestion
    wait=True  # Wait for the ingestion process to complete before proceeding
)

Failed to ingest row 3334: An error occurred (ValidationError) when calling the PutRecord operation: Validation Error: Provided values for Feature(s) [email] which do not exist in FeatureGroup [customers-fg-05-14-48-07].


IngestionError: [3334] -> Failed to ingest some data into FeatureGroup customers-fg-05-14-48-07

### Retrieving a Sample Record from the Customer Feature Group

In [73]:
# Define the customer ID for which to retrieve the record
customer_id = "C10001"

# Retrieve a sample record from the customer feature group using the specified customer ID
sample_record = sagemaker_session.boto_session.client(
    "sagemaker-featurestore-runtime", region_name=region
).get_record(
    FeatureGroupName=customers_feature_group_name,  # Name of the feature group
    RecordIdentifierValueAsString=str(customer_id)  # Customer ID to identify the record
)

# Display the retrieved sample record
sample_record

{'ResponseMetadata': {'RequestId': 'fae178c3-93ae-494b-a324-522a5fe928d3',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'fae178c3-93ae-494b-a324-522a5fe928d3',
   'content-type': 'application/json',
   'content-length': '970',
   'date': 'Tue, 05 Nov 2024 15:04:08 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'customer_id', 'ValueAsString': 'C10001'},
  {'FeatureName': 'sex', 'ValueAsString': '0'},
  {'FeatureName': 'is_married', 'ValueAsString': '1'},
  {'FeatureName': 'event_time', 'ValueAsString': '2024-05-02T13:43:28.295Z'},
  {'FeatureName': 'age_18-29', 'ValueAsString': '0'},
  {'FeatureName': 'age_30-39', 'ValueAsString': '0'},
  {'FeatureName': 'age_40-49', 'ValueAsString': '0'},
  {'FeatureName': 'age_50-59', 'ValueAsString': '0'},
  {'FeatureName': 'age_60-69', 'ValueAsString': '0'},
  {'FeatureName': 'age_70-plus', 'ValueAsString': '1'},
  {'FeatureName': 'n_days_active', 'ValueAsString': '0.345890410958904'},
  {'FeatureName': 'email', 'ValueAs