# OpenCALM SageMaker Inference

This is a sample code to deploy [OpenCALM](https://huggingface.co/spaces/kyo-takano/OpenCALM-7B) on SageMaker.

In [7]:
!pip install "sagemaker>=2.143.0" -U



In [8]:
import sagemaker, boto3, json
from sagemaker import get_execution_role
from sagemaker.pytorch.model import PyTorchModel
from sagemaker.huggingface import HuggingFace

role = get_execution_role()
region = boto3.Session().region_name
sess = sagemaker.Session()
bucket = sess.default_bucket()

sagemaker.__version__

'2.164.0'

## Package and Upload Model

In [9]:
!rm -rf scripts/model
%cd scripts
!tar -czvf ../package.tar.gz *
%cd -

/home/ubuntu/environment/aws-ml-jp/tasks/generative-ai/text-to-text/fine-tuning/instruction-tuning/Transformers/scripts
code/
code/inference.py
code/requirements.txt
code/finetune.py
code/run_clm.py
code/templates/
code/templates/simple_qa.json
code/templates/alpaca_short.json
code/templates/simple_qa2.json
code/templates/rinna.json
code/templates/alpaca.json
code/templates/simple_qa_ja.json
code/templates/stable_lm.json
code/templates/.ipynb_checkpoints/
code/templates/.ipynb_checkpoints/alpaca-checkpoint.json
code/templates/.ipynb_checkpoints/simple_qa2-checkpoint.json
code/templates/.ipynb_checkpoints/simple_qa_ja-checkpoint.json
code/templates/.ipynb_checkpoints/simple_qa-checkpoint.json
code/utils/
code/utils/prompter.py
code/utils/__init__.py
code/.ipynb_checkpoints/
code/.ipynb_checkpoints/inference-checkpoint.py
code/__init__.py
conftest.py
tests/
tests/environment.yml
tests/utils/
tests/utils/test_prompter.py
tests/utils/__init__.py
tests/__init__.py
/home/ubuntu/environment/a

In [10]:
model_path = sess.upload_data('package.tar.gz', bucket=bucket, key_prefix=f"OpenCALM")
model_path

's3://sagemaker-us-east-1-585936743357/OpenCALM/package.tar.gz'

## Deploy Model

In [11]:
from sagemaker.async_inference import AsyncInferenceConfig
from sagemaker.serializers import JSONSerializer

endpoint_name = "OpenCALM"

huggingface_model = PyTorchModel(
    model_data=model_path,
    framework_version="1.13",
    py_version='py39',
    role=role,
    name=endpoint_name,
    env={
        "model_params": json.dumps({
            "base_model": "cyberagent/open-calm-7b",
            "peft": False,
            "load_8bit": True,
            "prompt_template": "simple_qa_ja",
        })
        
    }
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type='ml.g5.2xlarge',
    endpoint_name=endpoint_name,
    serializer=JSONSerializer(),
    async_inference_config=AsyncInferenceConfig()
)

----------!

## Run Inference

In [14]:
from sagemaker.predictor import Predictor
from sagemaker.predictor_async import AsyncPredictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import NumpyDeserializer

predictor_client = AsyncPredictor(
    predictor=Predictor(
        endpoint_name=endpoint_name,
        sagemaker_session=sess,
        serializer=JSONSerializer(),
        deserializer=NumpyDeserializer()
    ),
    name=endpoint_name
)
data = {
    "instruction": "映画『ウエスト・サイド物語』に登場する2つの少年グループといえば、シャーク団と何団?",
    "max_new_tokens": 64,
    "temperature": 0,
    "do_sample": True,
    "pad_token_id": 1,
    # "stop_ids": [50278, 50279, 50277, 1, 0],
}
response = predictor_client.predict(
    data=data
)
print(response)

ClientError: An error occurred (ExpiredToken) when calling the PutObject operation: The provided token has expired.

## Delete Endpoint

In [None]:
predictor.delete_model()
predictor.delete_endpoint()