In [None]:
import boto3
from sagemaker.feature_store.feature_group import FeatureGroup
import time
import pandas as pd

In [None]:
import boto3
import sagemaker

print("Python ok")
print("sagemaker:", sagemaker.__version__)

boto_sess = boto3.Session()  # uses your local AWS creds/SSO
sm_session = sagemaker.Session(boto_session=boto_sess)

print("Region:", boto_sess.region_name)
print("Default bucket:", sm_session.default_bucket())

In [None]:
# 1. Configuration
region = "ap-south-1" # Change to your region
# role_arn = "arn:aws:iam::487512486150:user/aws-admin-charuka" # Paste your ARN here
role_arn = "arn:aws:iam::487512486150:role/recommendationsystem-sagemaker-role"

# 2. Initialize Boto3 Session with your local credentials
boto_session = boto3.Session(region_name=region)

# 3. Create SageMaker Session
sagemaker_session = sagemaker.Session(
    boto_session=boto_session,
    default_bucket="amazon-sagemaker-local-dev-store"
)

# # 4. Initialize Feature Store Runtime Clients
# featurestore_runtime = boto_session.client(
#     service_name='sagemaker-featurestore-runtime', 
#     region_name=region
)
sagemaker_client = boto_session.client(
    service_name='sagemaker', 
    region_name=region
)

print(f"Connected locally to SageMaker in {region}")

## Building the Training Dataset

### Step 1: Initialize the FeatureStore Client

In [None]:
from sagemaker.feature_store.feature_store import FeatureStore

# Initialize the FeatureStore object
feature_store = FeatureStore(sagemaker_session=sagemaker_session)

### Step 2: Create a "Base" Entity DataFrame

In [None]:
# Assuming 'df_reviews' is your actual cleaned reviews dataframe
# 1. Start the builder using your REAL reviews as the spine
builder = feature_store.create_dataset(
    base=df_reviews, # Use your actual review data here!
    event_time_identifier_feature_name='event_time_seconds',
    record_identifier_feature_name='user_id',
    output_path=f"s3://{bucket}/training-datasets/all_beauty_real_v1"
)

# 2. Join with the two Feature Groups you already created
builder = builder.with_feature_group(user_feature_group, "user_id")
builder = builder.with_feature_group(item_feature_group, "parent_asin")

# 3. Save the actual training file to S3
s3_uri, query = builder.to_csv_file()
print(f"Your real training dataset is now at: {s3_uri}")

## Pre-process the Data (Sparse Matrix)

In [None]:
import sagemaker.amazon.common as smac
import io

# 1. Load the generated CSV into Pandas (if it's not too large)
df_train = pd.read_csv(s3_uri)

# 2. Extract features (X) and target (y - the rating)
# Make sure to drop IDs and only keep numerical/one-hot features
X = df_train.drop(['user_id', 'parent_asin', 'rating', 'event_time_seconds'], axis=1).values.astype('float32')
y = df_train['rating'].values.astype('float32')

# 3. Convert to RecordIO-Protobuf
buf = io.BytesIO()
smac.write_numpy_to_pb_serialized_binary_tensor(buf, X, y)
buf.seek(0)

# 4. Upload the training file to S3
key = 'all-beauty-recordio'
boto3.resource('s3').Bucket(bucket).Object(f"train/{key}").upload_fileobj(buf)
train_data_path = f"s3://{bucket}/train/{key}"

## Set Up the Estimator

In [None]:
from sagemaker import image_uris

# Get the container for Factorization Machines
container = image_uris.retrieve("factorization-machines", boto_session.region_name)

# Define the Estimator
fm = sagemaker.estimator.Estimator(
    container,
    role_arn, # The role we created with S3/SageMaker access
    instance_count=1,
    instance_type='ml.c5.xlarge',
    output_path=f"s3://{bucket}/model-artifacts/",
    sagemaker_session=sagemaker_session
)

# Set Hyperparameters
fm.set_hyperparameters(
    feature_dim=X.shape[1], # Must match the number of columns in your X matrix
    predictor_type='regressor', # We are predicting a rating (1-5)
    num_factors=64,             # Latent factors for the FM model
    epochs=10
)

## Run the Training Job

In [None]:
# Start training
fm.fit({'train': train_data_path})

print("Model training complete! Check the S3 model-artifacts folder.")

## Deploy the Model as an API Endpoint

In [None]:
# 1. Deploy the trained model to an endpoint
# Factorization Machines require a small instance since they are efficient
predictor = fm.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',  # Cost-effective for testing
    serializer=sagemaker.serializers.JSONSerializer(),
    deserializer=sagemaker.deserializers.JSONDeserializer()
)

print(f"Endpoint name: {predictor.endpoint_name}")

## Test the Endpoint

In [None]:
import boto3
import json

runtime = boto3.client('sagemaker-runtime')

# Your endpoint name from the console
endpoint_name = "all-beauty-prediction-endpoint" 

payload = {
    "instances": [{"features": [0.5, 1.1, 2.0, 0.0, 1.0]}]
}

response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(payload)
)

# Parse the StreamingBody response
result = json.loads(response['Body'].read().decode())
print(f"Prediction result: {result}")