In [1]:
from sagemaker.feature_store.feature_group import FeatureGroup
from time import gmtime, strftime, sleep
from random import randint
import pandas as pd
import numpy as np
import subprocess
import sagemaker
import importlib
import logging
import time
import sys
import boto3

In [2]:
logger = logging.getLogger('__name__')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

In [3]:
logger.info(f'Using SageMaker version: {sagemaker.__version__}')
logger.info(f'Using Boto3 version: {boto3.__version__}')

Using SageMaker version: 2.144.0
Using Boto3 version: 1.26.100


In [5]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
default_bucket = 'chime-fs-demo'
logger.info(f'Default S3 bucket = {default_bucket}')
prefix = 'sagemaker-feature-store'
region = sagemaker_session.boto_region_name

boto_session = boto3.Session(region_name=region)
sagemaker_client = boto_session.client(service_name='sagemaker', region_name=region)
featurestore_runtime = boto_session.client(service_name='sagemaker-featurestore-runtime', region_name=region)

Default S3 bucket = chime-fs-demo


In [6]:
feature_group_name ='cc-agg-chime-fg'

In [7]:
sagemaker_client.describe_feature_metadata(
    FeatureGroupName=feature_group_name,
    FeatureName="name" 
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:461312420708:feature-group/cc-agg-chime-fg',
 'FeatureGroupName': 'cc-agg-chime-fg',
 'FeatureName': 'name',
 'FeatureType': 'String',
 'CreationTime': datetime.datetime(2023, 4, 7, 19, 43, 33, 952000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 4, 2, 22, 25, 44, 729000, tzinfo=tzlocal()),
 'Parameters': [],
 'ResponseMetadata': {'RequestId': '90eda245-c143-4031-8aed-248ee7768962',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '90eda245-c143-4031-8aed-248ee7768962',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '257',
   'date': 'Sat, 08 Apr 2023 20:29:38 GMT'},
  'RetryAttempts': 0}}

In [8]:
sagemaker_client.update_feature_metadata(
    FeatureGroupName=feature_group_name,
    FeatureName="name",
    ParameterAdditions=[
        {"Key": "team", "Value": "mlops"},
        {"Key": "org", "Value": "customer fin team"},
    ]
)

{'ResponseMetadata': {'RequestId': 'a4073365-898b-4648-a6f7-763f100407e7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'a4073365-898b-4648-a6f7-763f100407e7',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 08 Apr 2023 20:30:26 GMT'},
  'RetryAttempts': 0}}

In [9]:
sagemaker_client.describe_feature_metadata(
    FeatureGroupName=feature_group_name,
    FeatureName="name" 
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:461312420708:feature-group/cc-agg-chime-fg',
 'FeatureGroupName': 'cc-agg-chime-fg',
 'FeatureName': 'name',
 'FeatureType': 'String',
 'CreationTime': datetime.datetime(2023, 4, 7, 19, 43, 33, 952000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2023, 4, 8, 20, 30, 26, 693000, tzinfo=tzlocal()),
 'Parameters': [{'Key': 'org', 'Value': 'customer fin team'},
  {'Key': 'team', 'Value': 'mlops'}],
 'ResponseMetadata': {'RequestId': 'ba18e61b-580d-45b1-99b2-8217e06b1904',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ba18e61b-580d-45b1-99b2-8217e06b1904',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '329',
   'date': 'Sat, 08 Apr 2023 20:30:36 GMT'},
  'RetryAttempts': 0}}

In [10]:
# Search functions that returns features where either feature name, description or parameters (key/value pairs) match the search criteria
def search_features_using_string(search_string):
    response = sagemaker_client.search(
        Resource= "FeatureMetadata",
        SearchExpression={
            'Filters': [
                {
                    'Name': 'FeatureName',
                    'Operator': 'Contains',
                    'Value': search_string
                },
                {
                    'Name': 'Description',
                    'Operator': 'Contains',
                    'Value': search_string
                },
                {
                    'Name': 'AllParameters',
                    'Operator': 'Contains',
                    'Value': search_string
                }
            ],
            "Operator": "Or"
        },
    )
    # Displaying results in a DataFrame
    df=pd.json_normalize(response['Results'], max_level=1)
    df.columns = df.columns.map(lambda col: col.split(".")[1])
    df=df.drop('FeatureGroupArn', axis=1)
    return df

# Searching for Feature which contains "married" string in either feature name, description, or parameters
search_string="name"
search_features_using_string(search_string)

Unnamed: 0,FeatureGroupName,FeatureName,FeatureType,CreationTime,LastModifiedTime,Parameters
0,cc-agg-chime-fg,name,String,2023-04-07 19:43:33+00:00,2023-04-08 20:30:26+00:00,"[{'Key': 'org', 'Value': 'customer fin team'},..."
1,cc-agg-update-test-1,name,String,2023-04-06 08:09:22+00:00,2023-04-06 08:09:22+00:00,[]
2,cc_agg_update_test,name,String,2023-04-06 07:41:34+00:00,2023-04-06 07:41:34+00:00,[]
3,cc-agg-batch-chime-fg,name,String,2023-04-06 06:49:17+00:00,2023-04-06 06:49:17+00:00,[]
