In [1]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'ckip-joint/bloom-3b-zh',
	'SM_NUM_GPUS': json.dumps(1)
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="0.8.2"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
    model_data_download_timeout=3600,
	container_startup_health_check_timeout=3000,
  )
  
    
# send request
predictor.predict({
	"inputs": "四月的某一天，天氣晴朗寒冷，",
})


------------------!

[{'generated_text': '四月的某一天，天氣晴朗寒冷，我獨自一人坐在公園的長椅上，看著遠方，看著天空，看著遠方'}]

In [2]:
import pandas as pd
df = pd.read_csv('data/samples.csv', index_col=0)
dfc = pd.read_csv('data/category.csv')


In [11]:
instruction ="""
Please classify the cateory and sub category based on the product name. 
Product name: {0}
1. Return the results in JSON format with the following keys: "product_name", "category", "sub_category", and "common_labels".
2. The "product_name" field in the JSON should be an exact copy of the given product name.
3. Your task is to classify a product name into one of the following categories: "咖啡類","飲料類","水類","乳品類","豆米漿/植物奶","冰品類","鮮食類","烘焙/甜點類","蛋品類","生鮮食品類","冷凍食品類","保健食品類","點心/零食類","民生食材類","日用百貨類","洗浴清潔/保養類","寵物用品類","戶外休閒用品類","家電類","書本/文教用品類","3C類","服飾鞋包類","精品類","傢俱/家飾類","菸酒類","禮盒類"。 
Please return the category as the "category" field. Do not make up a new category. 
Please note that you should accurately categorize each product based on its name and description, 
using your best judgment to determine which category it belongs to. 
Your response should be specific and accurate, ensuring that each product is correctly classified according to its type.

4. Once you have determined the category of the product, please select the best matching 
subcategory list below and classify the product accordingly. 
This result shall be returned in json "sub_category" field

"咖啡類":["現做咖啡飲品","罐裝/瓶裝咖啡","沖泡式咖啡","濾掛式咖啡","即溶咖啡","咖啡相關食用品"]
"飲料類":["現做茶飲品","茶飲","碳酸/汽水","果汁/果醋飲","運動飲料","養生飲品","能量飲料","特殊風味飲品","沖泡類飲品","現調果汁","其他現調飲品"]
"水類":["礦泉水","氣泡水","機能水"]
"乳品類":["鮮乳類","保久乳","調味乳","優格","優酪乳","發酵乳"]
"豆米漿/植物奶":["豆漿","米漿","燕麥奶","其他植物奶"]


5. assign appropriate labels/tags to the product as "common_labels", such as flavor, brand, key ingredient, and package size 
(if applicable), etc. Please avoid choosing duplicate labels. Your labels shall be no more than four.

The json result is:
"""

item = df.sample(1).item_name.iloc[0]
item

prompt = instruction.format(item)
prompt

predict = predictor.predict({"inputs": prompt, te})

print(item, predict)





平台費 2023-05-15 z8pl-r5kc  [{'generated_text': '\nPlease classify the cateory and sub category based on the product name. \nProduct name: 平台費 2023-05-15 z8pl-r5kc \n1. Return the results in JSON format with the following keys: "product_name", "category", "sub_category", and "common_labels".\n2. The "product_name" field in the JSON should be an exact copy of the given product name.\n3. Your task is to classify a product name into one of the following categories: "咖啡類","飲料類","水類","乳品類","豆米漿/植物奶","冰品類","鮮食類","烘焙/甜點類","蛋品類","生鮮食品類","冷凍食品類","保健食品類","點心/零食類","民生食材類","日用百貨類","洗浴清潔/保養類","寵物用品類","戶外休閒用品類","家電類","書本/文教用品類","3C類","服飾鞋包類","精品類","傢俱/家飾類","菸酒類","禮盒類"。 \nPlease return the category as the "category" field. Do not make up a new category. \nPlease note that you should accurately categorize each product based on its name and description, \nusing your best judgment to determine which category it belongs to. \nYour response should be specific and accurate, ensuring that each product is corr

[{'generated_text': '\n\nPlease generate 10 product names for 咖啡類 category.\n1. Return the results in JSON format with the following key: "product_name".\n\nThe json result is: {"product_name": ["伯朗咖啡", "手沖咖啡", \n"義式咖啡", "美式咖啡", "拿鐵咖啡", "卡布'}]

In [10]:
predict = predictor.predict({"inputs": '為咖啡類商品產生10項商品名稱'})
predict

[{'generated_text': '為咖啡類商品產生10項商品名稱，並依商品名稱進行分析，再以商品名稱進行分析，再以商品名稱進行'}]

In [13]:
prompt = """

Please generate 100 product names for 咖啡類 category.
1. Return the results in JSON format with the following key: "product_name".

The json result is: {"product_name": ["伯朗咖啡", "手沖咖啡", 
"""

predict = predictor.predict({"inputs": prompt})
predict

[{'generated_text': '\n\nPlease generate 100 product names for 咖啡類 category.\n1. Return the results in JSON format with the following key: "product_name".\n\nThe json result is: {"product_name": ["伯朗咖啡", "手沖咖啡", \n"義式咖啡", "美式咖啡", "拿鐵咖啡", "卡布'}]