In [6]:
# pip install "sagemaker==2.*" boto3 pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import boto3
import sagemaker

print("Python ok")
print("sagemaker:", sagemaker.__version__)

boto_sess = boto3.Session()  # uses your local AWS creds/SSO
sm_session = sagemaker.Session(boto_session=boto_sess)

print("Region:", boto_sess.region_name)
print("Default bucket:", sm_session.default_bucket())

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/charukagunawardhane/Library/Application Support/sagemaker/config.yaml
Python ok
sagemaker: 2.255.0
Region: ap-south-1
Default bucket: sagemaker-ap-south-1-487512486150


## Local Notebook Initialization Code

In [3]:

# 1. Configuration
region = "ap-south-1" # Change to your region
# role_arn = "arn:aws:iam::487512486150:user/aws-admin-charuka" # Paste your ARN here
role_arn = "arn:aws:iam::487512486150:role/recommendationsystem-sagemaker-role"

# 2. Initialize Boto3 Session with your local credentials
boto_session = boto3.Session(region_name=region)

# 3. Create SageMaker Session
sagemaker_session = sagemaker.Session(
    boto_session=boto_session,
    default_bucket="amazon-sagemaker-local-dev-store"
)

# 4. Initialize Feature Store Runtime Clients
featurestore_runtime = boto_session.client(
    service_name='sagemaker-featurestore-runtime', 
    region_name=region
)
sagemaker_client = boto_session.client(
    service_name='sagemaker', 
    region_name=region
)

print(f"Connected locally to SageMaker in {region}")

Connected locally to SageMaker in ap-south-1


In [4]:
 # List all feature groups in your region
response = sagemaker_client.list_feature_groups()

print("Available Feature Groups:")
for fg in response['FeatureGroupSummaries']:
    print(f"- {fg['FeatureGroupName']} (Created: {fg['CreationTime']})")

Available Feature Groups:
- all-beauty-features-1766218384 (Created: 2025-12-20 14:06:25.712000+05:30)


## Define and Create (Local Execution)

In [6]:
import boto3
from sagemaker.feature_store.feature_group import FeatureGroup
import time
import pandas as pd

In [4]:
s3 = boto3.client("s3")
s3.list_buckets()

{'ResponseMetadata': {'RequestId': 'J4SM0C0JHXN0SM6W',
  'HostId': 'NBC/AivwVPEEyB3e/lxgo9p3M3+JvL6v0mhAGnD2sc9ukBuMiMItZQSz9ZpqL/uOn2ttjSU5c/g=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'NBC/AivwVPEEyB3e/lxgo9p3M3+JvL6v0mhAGnD2sc9ukBuMiMItZQSz9ZpqL/uOn2ttjSU5c/g=',
   'x-amz-request-id': 'J4SM0C0JHXN0SM6W',
   'date': 'Sat, 20 Dec 2025 20:32:12 GMT',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Buckets': [{'Name': 'amazon-sagemaker-487512486150-ap-south-1-4923573543c7',
   'CreationDate': datetime.datetime(2025, 12, 20, 2, 59, 53, tzinfo=tzutc()),
   'BucketArn': 'arn:aws:s3:::amazon-sagemaker-487512486150-ap-south-1-4923573543c7'},
  {'Name': 'aws-glue-assets-487512486150-ap-south-1',
   'CreationDate': datetime.datetime(2025, 12, 19, 12, 50, 52, tzinfo=tzutc()),
   'BucketArn': 'arn:aws:s3:::aws-glue-assets-487512486150-ap-south-1'},
  {'Name': 'recommendation-project-rapid',
   'Creation

In [33]:
# Unique name for All_Beauty
feature_group_name = f"all-beauty-features-{int(time.time())}"

all_beauty_fg = FeatureGroup(
    name=feature_group_name, 
    sagemaker_session=sagemaker_session
)

In [7]:
df = pd.read_parquet(
    "s3://recommendation-project-rapid/processed/all_beauty_dataset",
    engine="pyarrow"
)

df.head()

Unnamed: 0,user_id,parent_asin,event_time_seconds,calendar_date,movie_title,main_category,rating
0,AFQLNQNQYFWQZPJQZS6V3NZU4QBQ,B09JS339BZ,1643394000.0,2022-01-28,muaowig Ombre Body Wave Bundles 1B Grey Human ...,All Beauty,1.0
1,AHGAOIZVODNHYMNCBV4DECZH42UQ,B01AKTGHFW,1500769000.0,2017-07-23,"Charcoal Konjac Face Sponge 3 pk | Acne, Psori...",All Beauty,1.0
2,AFZUK3MTBIBEDQOPAK3OATUOUKLA,B0BFR5WF1R,1675826000.0,2023-02-08,Halo Hair Extensions Thick Invisible Wire Hair...,All Beauty,1.0
3,AFETVW7S5M4LVJ7GTWPCKT7S3YBQ,B01M5KNSQN,1649634000.0,2022-04-10,"NYX Eyebrow Shaper, 1 Count",All Beauty,1.0
4,AGVVUU3QRQBHNASSGI5YQLPYOI2Q,B09FF97RHL,1648825000.0,2022-04-01,"24 Eyebrow stencil , Meilala Eyebrow Shaper Ki...",All Beauty,1.0


In [40]:
df['rating'] = df['rating'].astype('float64')

In [20]:

# # After loading your cleaned Parquet data into a Pandas DataFrame 'df'
# all_beauty_fg.load_feature_definitions(data_frame=df)
# print("Complete loading feature definitions")
# print("")

Complete loading feature definitions



In [25]:
# Create the group in the cloud from your local notebook
all_beauty_fg.create(
    s3_uri="s3://recommendation-project-rapid/offline-store",
    record_identifier_name="user_id",
    event_time_feature_name="event_time_seconds",
    role_arn=role_arn,
    enable_online_store=True
)

print("Complete creating feature groups")
print("")

Complete creating feature groups



In [26]:
# import time

def wait_for_feature_group_creation(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    print(f"Initial status for {feature_group.name}: {status}")
    
    while status == "Creating":
        print("Waiting for Feature Group to be Created...")
        time.sleep(10)  # Wait 10 seconds before checking again
        status = feature_group.describe().get("FeatureGroupStatus")
        
    if status == "Created":
        print(f"Feature Group {feature_group.name} successfully created!")
    else:
        # If it's 'CreateFailed', SageMaker will provide a reason
        description = feature_group.describe()
        failure_reason = description.get("FailureReason", "Unknown failure.")
        print(f"Feature Group creation failed. Status: {status}")
        print(f"Reason: {failure_reason}")

# Run this for your specific group
wait_for_feature_group_creation(all_beauty_fg)

Initial status for all-beauty-features-1766218384: Created
Feature Group all-beauty-features-1766218384 successfully created!


## Ingest Data

In [8]:
from sagemaker.feature_store.feature_group import FeatureGroup

# 1. Use the EXACT name of the group you created earlier 
# (Check the SageMaker Console or Unified Studio if you forgot the exact string)
existing_fg_name = "all-beauty-features-1766218384" 

# 2. Re-instantiate the object
loaded_fg = FeatureGroup(name=existing_fg_name, sagemaker_session=sagemaker_session)

# 3. Verify it's connected by describing it
try:
    description = loaded_fg.describe()
    print(f"Successfully loaded Feature Group: {existing_fg_name}")
    print(f"Status: {description.get('FeatureGroupStatus')}")
except Exception as e:
    print(f"Error: Could not find Feature Group '{existing_fg_name}'. Details: {e}")

Successfully loaded Feature Group: all-beauty-features-1766218384
Status: Created


In [1]:
# Ingest the cleaned Pandas DataFrame 'df'
# max_workers=5 allows parallel threads for faster upload from your local machine
loaded_fg.ingest(data_frame=df, max_workers=5, wait=True)

print("Ingestion complete.")

Ingestion complete.


## Creating Separate Feature Groups

In [9]:
# --- CREATE USER FEATURE GROUP ---
user_fg_name = f"all-beauty-users-{int(time.time())}"
user_fg = FeatureGroup(name=user_fg_name, sagemaker_session=sagemaker_session)

# Load only the columns related to the user
user_cols = ['user_id', 'event_time_seconds', 'rating_count_by_user'] # example user columns
user_fg.load_feature_definitions(data_frame=df[user_cols])

user_fg.create(
    s3_uri="s3://recommendation-project-rapid/offline-store/users",
    record_identifier_name="user_id",
    event_time_feature_name="event_time_seconds",
    role_arn=role_arn,
    enable_online_store=True
)

# --- CREATE ITEM FEATURE GROUP ---
item_fg_name = f"all-beauty-items-{int(time.time())}"
item_fg = FeatureGroup(name=item_fg_name, sagemaker_session=sagemaker_session)

# Load only the columns related to the item/product
item_cols = ['parent_asin', 'event_time_seconds', 'average_rating', 'main_category']
item_fg.load_feature_definitions(data_frame=df[item_cols])

item_fg.create(
    s3_uri="s3://recommendation-project-rapid/offline-store/items",
    record_identifier_name="parent_asin",
    event_time_feature_name="event_time_seconds",
    role_arn=role_arn,
    enable_online_store=True
)

In [None]:
# Check status for both
for fg in [user_fg, item_fg]:
    status = fg.describe().get("FeatureGroupStatus")
    print(f"{fg.name} status: {status}")
    
    if status != "Created":
        print(f"Warning: {fg.name} is not ready yet. Please wait.")