Skip to content

Sagemaker: AttributeError: 'LocalSagemakerClient' object has no attribute 'create_feature_group' #3360

@akramIOT

Description

@akramIOT

To reproduce/Code Snippet:

from sagemaker.feature_store.feature_group import FeatureGroup
from time import gmtime, strftime, sleep
from random import randint
import boto3
import sagemaker
import pandas as pd
import numpy as np
import logging
import random
import time
import subprocess
import sys
import importlib
import pprint
from sagemaker.local import LocalSession
from sagemaker import get_execution_role

logger = logging.getLogger('name')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

if sagemaker.version < '2.48.1':
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sagemaker==2.48.1'])
importlib.reload(sagemaker)

if boto3.version < '1.24.23':
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'boto3==1.24.23'])
importlib.reload(boto3)

logger.info(f'Using SageMaker version: {sagemaker.version}')
logger.info(f'Using Pandas version: {pd.version}')
logger.info(f'Using boto3 version: {boto3.version}')
pretty_printer = pprint.PrettyPrinter(indent=4)

sagemaker_role = 'arn:aws:iam::xxxxxxxxxxxxx:role/service-role/AmazonSageMaker-ExecutionRole-2022XXXXXXXXXX'
sagemaker_session = LocalSession()
sagemaker_session.config = {'local': {'local_code': True}}

'''

Sagemaker Role

try:
role = sagemaker.get_execution_role()
except ValueError:
iam = boto3.client('iam')
role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-2022XXXXXXXX')['Role']['Arn']
'''

default_bucket = sagemaker_session.default_bucket()
logger.info(f'Default S3 bucket = {default_bucket}')
prefix = 'sagemaker-feature-store'
region = sagemaker_session.boto_region_name

boto_session = boto3.Session(region_name=region)
sagemaker_runtime = boto_session.client(service_name='sagemaker', region_name=region)
featurestore_runtime = boto_session.client(service_name='sagemaker-featurestore-runtime', region_name=region)
s3 = boto_session.resource('s3')

customers_df = pd.read_csv('/Users/akram/AKRAM_CODE_FOLDER/ML/Washington_ML/serverless-machine-learning/AWS_SAGEMAKER_FEATURE_GROUP/customers.csv')
customers_df.head(5)
customers_df.dtypes

current_time_sec = int(round(time.time()))
customers_df['event_time'] = pd.Series([current_time_sec] * len(customers_df), dtype="float64")
customers_df.head(5)

customers_df['customer_id'] = customers_df['customer_id'].astype('string')
current_timestamp = strftime('%m-%d-%H-%M', gmtime())
customers_feature_group_name = f'fs-customers-{current_timestamp}'
logger.info(f'Feature group name = {customers_feature_group_name}')

customers_feature_group = FeatureGroup(name=customers_feature_group_name,
sagemaker_session=sagemaker_session)
customers_feature_group.load_feature_definitions(data_frame=customers_df)

def wait_for_feature_group_creation_complete(feature_group):
status = feature_group.describe().get('FeatureGroupStatus')
print(f'Initial status: {status}')
while status == 'Creating':
logger.info(f'Waiting for feature group: {feature_group.name} to be created ...')
time.sleep(5)
status = feature_group.describe().get('FeatureGroupStatus')
if status != 'Created':
raise SystemExit(f'Failed to create feature group {feature_group.name}: {status}')
logger.info(f'FeatureGroup {feature_group.name} was successfully created.')

customers_feature_group.create(s3_uri=f's3://{default_bucket}/{prefix}', ==========> Line of error as per Traceback
record_identifier_name='customer_id',
event_time_feature_name='event_time',
role_arn=sagemaker_role,
enable_online_store=True)

wait_for_feature_group_creation_complete(customers_feature_group)
describe_feature_group_result = sagemaker_runtime.describe_feature_group(
FeatureGroupName=customers_feature_group_name)
pretty_printer.pprint(describe_feature_group_result)

logger.info(f'Ingesting data into feature group: {customers_feature_group.name} ')
customers_feature_group.ingest(data_frame=customers_df,
max_workers=3,
wait=True)
customer_id = f'C{randint(1, 500)}'
logger.info(f'customer_id={customer_id}')

feature_record = featurestore_runtime.get_record(FeatureGroupName=customers_feature_group_name,
RecordIdentifierValueAsString=customer_id)
feature_record

customers_query = customers_feature_group.athena_query()
customers_table = customers_query.table_name

output_location = f's3://{default_bucket}/{prefix}/query_results/'
query_string = f'SELECT * FROM "{customers_table}" limit 10'

customers_query.run(query_string=query_string,output_location=output_location)
customers_query.wait()
athena_df = customers_query.as_dataframe()
athena_df.head()

sagemaker_runtime.update_feature_group(
FeatureGroupName=customers_feature_group_name,
FeatureAdditions=[
{"FeatureName": "has_kids", "FeatureType": "Integral"}
])

time.sleep(60)

describe_feature_group_result = sagemaker_runtime.describe_feature_group(
FeatureGroupName=customers_feature_group_name
)
pretty_printer.pprint(describe_feature_group_result)

customers_query.run(query_string=query_string,output_location=output_location)
customers_query.wait()
athena_df_update = customers_query.as_dataframe()
athena_df_update.head()

customers_df.drop(['event_time'],axis=1)
customers_df['has_kids'] =np.random.randint(0, 2, customers_df.shape[0])
customers_df.dtypes

customers_df['event_time'] = pd.Series([current_time_sec] * len(customers_df), dtype="float64")
customers_df.head(10)

logger.info(f'Ingesting data into feature group: {customers_feature_group.name} ...')
customers_feature_group.ingest(data_frame=customers_df, max_workers=3, wait=True)
logger.info(f'{len(customers_df)} customer records ingested into feature group: {customers_feature_group.name}')

get_record_result = featurestore_runtime.get_record(
FeatureGroupName=customers_feature_group_name,
RecordIdentifierValueAsString=customer_id
)
pretty_printer.pprint(get_record_result)

customers_query.run(query_string=query_string,output_location=output_location)
customers_query.wait()
athena_df_update = customers_query.as_dataframe()
athena_df_update.head()

###############
describe_feature_group_result = sagemaker_runtime.describe_feature_group(
FeatureGroupName=customers_feature_group_name
)
pretty_printer.pprint(describe_feature_group_result)

s3_config = describe_feature_group_result['OfflineStoreConfig']['S3StorageConfig']
s3_uri = s3_config['ResolvedOutputS3Uri']
full_prefix = '/'.join(s3_uri.split('/')[3:])
logger.info(full_prefix)

bucket = s3.Bucket(default_bucket)
offline_objects = bucket.objects.filter(Prefix=full_prefix)
offline_objects.delete()

customers_feature_group.delete()

Expected behavior:
SageMaker Feature Group Creation API should work correctly

Screenshots or logs
AttributeError: 'LocalSagemakerClient' object has no attribute 'create_feature_group'

System information
A description of your system. Please provide:

(serverless-machine-learning) akram@ISHERIFF-M-RBNA AWS_SAGEMAKER_FEATURE_GROUP % pip list | grep sagemaker
sagemaker 2.109.0
(serverless-machine-learning) akram@ISHERIFF-M-RBNA AWS_SAGEMAKER_FEATURE_GROUP %

SageMaker Python SDK version: 2.109.0
Python version: 3.9
CPU or GPU: CPU
Custom Docker image (Y/N): N

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions