In [2]:
!pip install -U sagemaker transformers --quiet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [4]:
try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

In [5]:
# Hub Model configuration. https://huggingface.co/models
trust_remote_code=True
hub = {
	'HF_MODEL_ID':'aisingapore/sealion7b-instruct-nc',
    'HF_MODEL_TRUST_REMOTE_CODE':json.dumps(trust_remote_code),
	'SM_NUM_GPUS': json.dumps(4)
}

In [6]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
	env=hub,
	role=role,
    transformers_version="4.34.1",
)

In [7]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.12xlarge",
	container_startup_health_check_timeout=300,
  )

----------!

In [11]:
!pip install sentencepiece transformers

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [14]:
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("aisingapore/sea-lion-3b", trust_remote_code=True)

config = transformers.AutoConfig.from_pretrained("aisingapore/sea-lion-3b", trust_remote_code=True)

In [39]:
# Constants for prompt generation
INSTRUCTION_KEY = "### USER:"
RESPONSE_KEY = "### RESPONSE:"
END_KEY = "### END"
#INTRO_BLURB = "Berikut adalah instruksi yang menjelaskan suatu tugas. Gunakan bahasa Indonesia. Tulislah sebuah respon yang menjawab tugas yang diberikan."
INTRO_BLURB = ""
PROMPT_FOR_GENERATION_FORMAT = """{intro}
{instruction_key}
{instruction}
{response_key}
""".format(
    intro=INTRO_BLURB,
    instruction_key=INSTRUCTION_KEY,
    instruction="{instruction}",
    response_key=RESPONSE_KEY,
)

# Generation arguments
payload = {
    "do_sample": True,
    "top_p": 0.6,
    "temperature": 0.1,
    "top_k": 50,
    "max_new_tokens": 1024,
    "repetition_penalty": 1.03,
    "return_full_text": False,
    "stop": ["</s>"]
}

In [40]:
# send request
chat = predictor.predict({
	"inputs": PROMPT_FOR_GENERATION_FORMAT.format(
    intro=INTRO_BLURB,
    instruction_key=INSTRUCTION_KEY,
    instruction="Jawablah pertanyaan berikut dengan penjelasan perhitungan: Bila Budi memiliki uang dua ribu rupiah lalu dia membeli eskrim seharga 1000 rupiah berapakah sisa uang Budi ?",
    response_key=RESPONSE_KEY,
),"parameters":payload
})

print(chat[0]["generated_text"])

Jumlah uang yang tersisa adalah 800 rupiah.


In [41]:
# Cleanup
predictor.delete_model()
predictor.delete_endpoint()