# SageMaker에서 NCF 모델 배포 및 엔드포인트 생성

이 노트북은 기본 커널을 conda_python3를 사용 합니다.

## 0. 환경 설정

In [1]:
import boto3
import time
import os
import sagemaker
from datetime import datetime

In [2]:
sagemaker_session  = sagemaker.session.Session()
role = sagemaker.get_execution_role()
bucket = sagemaker.session.Session().default_bucket()

print("role: ", role)
print("bucket: ", bucket)

role:  arn:aws:iam::376278017302:role/service-role/AmazonSageMaker-ExecutionRole-20230112T204234
bucket:  sagemaker-us-east-1-376278017302


## 1. 모델(model.tar.gz)을 S3에 업로드


In [3]:
model_prefix = 'ncf/model'
model_filename = "./model/model.tar.gz"

model_s3_path = sagemaker_session.upload_data(model_filename, bucket, model_prefix)
print("model: \n", model_s3_path)

model: 
 s3://sagemaker-us-east-1-376278017302/ncf/model/model.tar.gz


## 2. SageMaker에서 모델 생성
Model Registry에 등록

In [4]:
# Define model name
model_name = 'ncf-tf-model'

In [5]:
# Get the current session's region
session = boto3.Session()
region = session.region_name

print("Current region:", region)

Current region: us-east-1


<br>
등록된 모델(model.tar.gz)은 TensorFlow 2.6 환경에서 작성되었습니다.<br>
따라서 컨테이너 이미지는 'tensorflow-inference:2.6-cpu'를 사용합니다.

In [6]:
sagemaker_client = boto3.client('sagemaker', region_name=region)

# Create a model in SageMaker
create_model_response = sagemaker_client.create_model(
    ModelName=model_name,
    ExecutionRoleArn=role,
    PrimaryContainer={
        'Image': '763104351884.dkr.ecr.{}.amazonaws.com/tensorflow-inference:2.6-cpu'.format(region),
        'ModelDataUrl': 's3://{}/{}'.format(bucket, model_prefix + '/model.tar.gz'),
    }
)

## 3. SageMaker Endpoint 생성

In [7]:
# Define the endpoint config and endpoint names
endpoint_config_name = 'ncf-model-endpoint-config'
endpoint_name = 'ncf-model-endpoint'

In [8]:
# Create endpoint configuration
create_endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            'VariantName': 'AllTraffic',
            'ModelName': model_name,
            'InitialInstanceCount': 1,
            'InstanceType': 'ml.m5.xlarge',  # Choose the desired instance type
            'InitialVariantWeight': 1
        }
    ]
)

print("Endpoint config created:", create_endpoint_config_response['EndpointConfigArn'])

Endpoint config created: arn:aws:sagemaker:us-east-1:376278017302:endpoint-config/ncf-model-endpoint-config


In [9]:
# Create endpoint
create_endpoint_response = sagemaker_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)

print("Creating endpoint...")

Creating endpoint...


#### Endpoint 생성 완료까지 대기

In [10]:
# Wait for the endpoint to be in service
endpoint_status = 'Creating'
while endpoint_status == 'Creating':
    time.sleep(30)
    endpoint_status = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)['EndpointStatus']
    print("Endpoint status:", endpoint_status)

print("Endpoint created:", create_endpoint_response['EndpointArn'])

Endpoint status: Creating
Endpoint status: Creating
Endpoint status: Creating
Endpoint status: InService
Endpoint created: arn:aws:sagemaker:us-east-1:376278017302:endpoint/ncf-model-endpoint


# 아래의 SageMaker Endpoint ARN으로 추론을 합니다.
Lambda Function에서 NCF 모델 추론은 아래 SageMaker Endpoint ARN을 사용합니다.

In [11]:
print("SageMaker Endpoint ARN : ", create_endpoint_response['EndpointArn'])

SageMaker Endpoint ARN :  arn:aws:sagemaker:us-east-1:376278017302:endpoint/ncf-model-endpoint


#### 변수 저장
clean-up을 위해 필요한 변수 저장

In [12]:
%store endpoint_config_name
%store endpoint_name
%store model_name

Stored 'endpoint_config_name' (str)
Stored 'endpoint_name' (str)
Stored 'model_name' (str)


## (OPTION) Inference Test
아래의 코드에서 user_id 에 1, 2, 100 등 값을 넣어서 10개의 추천 아이템 (item_id)가 잘 출력되는지 확인

### a. 입력한 user_id 에 대한 ITEM_ID (10개) 만 출력

In [14]:
import numpy as np
import pandas as pd
import json
import sagemaker

# Load dataset and create user_to_index and item_to_index dictionaries
data = pd.read_csv('./dataset/merged_data.csv') 
user_ids = data['user_id'].unique()
item_ids = data.groupby('item_id').size().sort_values(ascending=False).index.to_numpy()

user_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
item_to_index = {item_id: index for index, item_id in enumerate(item_ids)}

# 추론할 USER_ID 입력
user_id = 3

user_idx = user_to_index[user_id]
item_idx_list = np.array([item_to_index[item_id] for item_id in item_ids])
user_input = np.full(len(item_ids), user_idx).reshape(-1, 1)
item_input = item_idx_list.reshape(-1, 1)


# Define SageMaker client
sagemaker_client = boto3.client('sagemaker-runtime')

# Perform inference using invoke_endpoint()
response = sagemaker_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(
        {"user_input": user_input.tolist(),
         "item_input": item_input.tolist()}
    )
)

# Parse response
predictions = json.loads(response['Body'].read().decode('utf-8'))

# Convert predictions to a 1-dimensional array
predictions_array = np.array(predictions['predictions']).reshape(-1)

# Get the indices of the top 10 items
top_10_indices = np.argsort(predictions_array)[-10:][::-1]

# Get the item_ids for the top 10 items
top_10_item_ids = [item_ids[idx] for idx in top_10_indices]

print("Top 10 item IDs for user ID", user_id, ":\n", top_10_item_ids)

Top 10 item IDs for user ID 3 :
 ['1849aebd-fdda-4f05-be3f-28f2c61d8901', '471622d6-3300-4591-816b-ab8487e70274', '78080d05-b078-441f-b245-54b2a2dec872', '3fa257ab-5c4c-4963-941f-bc73bafc8bff', '8d67146f-0b59-4761-9120-34ebfee7b299', '07441beb-9b1a-4c25-b302-2f11362cff86', '1f7340f0-805d-4ba9-a0e4-76d392eab5a1', '0770ffd6-c89a-489a-b9dc-e5c1c267e102', '95be8b94-feeb-48df-8449-34924e4d849d', 'ca3f03ff-72a5-4a6a-9cd9-a513fcde604e']


### b. 입력한 user_id 에 대한 ITEM_ID 및 상세 정보(10개)를 함께 출력

In [22]:
def get_item_list_details(items_df, item_id_list):
    '''
    해당 ITEM_ID 의 부가 정보를 제공
    '''
    df = pd.DataFrame(data={'ITEM_ID':item_id_list})
    rec_item_df = df.merge(items_df)
    return rec_item_df

In [25]:
import numpy as np
import pandas as pd
import json
import sagemaker

# Load dataset and create user_to_index and item_to_index dictionaries
data = pd.read_csv('./dataset/merged_data.csv') 
user_ids = data['user_id'].unique()
item_ids = data.groupby('item_id').size().sort_values(ascending=False).index.to_numpy()

user_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
item_to_index = {item_id: index for index, item_id in enumerate(item_ids)}

# 추론할 USER_ID 입력
user_id = 3

user_idx = user_to_index[user_id]
item_idx_list = np.array([item_to_index[item_id] for item_id in item_ids])
user_input = np.full(len(item_ids), user_idx).reshape(-1, 1)
item_input = item_idx_list.reshape(-1, 1)


# Define SageMaker client
sagemaker_client = boto3.client('sagemaker-runtime')

# Perform inference using invoke_endpoint()
response = sagemaker_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=json.dumps(
        {"user_input": user_input.tolist(),
         "item_input": item_input.tolist()}
    )
)

# Parse response
predictions = json.loads(response['Body'].read().decode('utf-8'))

# Convert predictions to a 1-dimensional array
predictions_array = np.array(predictions['predictions']).reshape(-1)

# Get the indices of the top 10 items
top_10_indices = np.argsort(predictions_array)[-10:][::-1]

# Get the item_ids for the top 10 items
top_10_item_ids = [item_ids[idx] for idx in top_10_indices]

print("Top 10 item IDs for user ID", user_id, ":\n")

## join with 'training_item.csv'
items_df = pd.read_csv("./dataset/training_item.csv")
get_item_list_details(items_df, top_10_item_ids)

Top 10 item IDs for user ID 3 :



Unnamed: 0,ITEM_ID,NAME,CATEGORY_L1,STYLE,PRODUCT_DESCRIPTION,PRICE
0,1849aebd-fdda-4f05-be3f-28f2c61d8901,Black Rain Jacket,apparel,jacket,Well-insulated rain jacket for men,122.99
1,471622d6-3300-4591-816b-ab8487e70274,Coffee Maker,housewares,kitchen,A must-have for your kitchen,55.99
2,78080d05-b078-441f-b245-54b2a2dec872,Sophisticated Ceramic Bowl,housewares,bowls,Sophisticated ceramic bowl for every occasion,18.99
3,3fa257ab-5c4c-4963-941f-bc73bafc8bff,Squash,groceries,vegetables,Keep your pantry well stocked with this delish...,5.99
4,8d67146f-0b59-4761-9120-34ebfee7b299,Beef,groceries,meat,Our beef is raised sustainably by our farmers,11.99
5,07441beb-9b1a-4c25-b302-2f11362cff86,Supercool Belt,accessories,belt,Supercool belt for women,42.99
6,1f7340f0-805d-4ba9-a0e4-76d392eab5a1,Chef Knife,housewares,kitchen,Incomparable for every kitchen,65.99
7,0770ffd6-c89a-489a-b9dc-e5c1c267e102,Beef,groceries,meat,Our beef is organic and raised sustainably,14.99
8,95be8b94-feeb-48df-8449-34924e4d849d,Faultless Microphone,instruments,microphone,This faultless microphone can be relied upon i...,129.99
9,ca3f03ff-72a5-4a6a-9cd9-a513fcde604e,Supercool Glasses,accessories,glasses,These supercool glasses for men are incomparable,118.99
