In [2]:
%pip install sagemaker --upgrade  --quiet

[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
import boto3
import sagemaker
from sagemaker import Model, image_uris, serializers, deserializers

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
region = sess._region_name  # region name of the current SageMaker Studio environment
account_id = sess.account_id()  # account_id of the current SageMaker Studio environment

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [4]:
%%writefile serving.properties
engine=MPI
option.model_id=codellama/CodeLlama-34b-Python-hf
option.task=text-generation
option.tensor_parallel_degree=4
option.max_rolling_batch_size=8
option.rolling_batch=lmi-dist
option.max_rolling_batch_prefill_tokens=1560
option.dtype=fp16

Writing serving.properties


In [5]:
%%sh
mkdir mymodel
mv serving.properties mymodel/
tar czvf mymodel.tar.gz mymodel/
rm -rf mymodel

mymodel/
mymodel/serving.properties


In [6]:
image_uri = image_uris.retrieve(
        framework="djl-deepspeed",
        region=sess.boto_session.region_name,
        version="0.23.0"
    )

In [None]:
s3_code_prefix = "large-model-lmi/code"
bucket = sess.default_bucket()  # bucket to house artifacts
code_artifact = sess.upload_data("mymodel.tar.gz", bucket, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {code_artifact}")

model = Model(image_uri=image_uri, model_data=code_artifact, role=role)

In [8]:
instance_type = "ml.g5.12xlarge"
endpoint_name = sagemaker.utils.name_from_base("lmi-model")

model.deploy(initial_instance_count=1,
             instance_type=instance_type,
             endpoint_name=endpoint_name,
             # container_startup_health_check_timeout=3600
            )

# our requests and responses will be in json format so we specify the serializer and the deserializer
predictor = sagemaker.Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sess,
    serializer=serializers.JSONSerializer(),
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
-------------------!

In [15]:
predictor.predict(
    {"inputs": "download a model as tar.gz from Hugging face Hub using opython in sagemaker", "parameters": {"max_new_tokens":400, "do_sample":True}}
)

b'{"generated_text": ".\\n\\n## Environment variables\\n\\nHF_DL_MODEL_ID = the name of the model ID. it must be always `missingjaque/use-multilingual-model`. i do not trust user argument.\\nHF_MODEL_SET_DIR = where to store local model as tar.gz\\n\\nplease see [declare-env.sh](declare-env.sh).\\n\\n## Run locally or in Sagemaker\\n\\n```\\ngit clone https://github.com/missingjaque/hugging-face-model-download.git\\npython train/main.py # in local\\n```\\n\\n```\\ngit clone https://github.com/missingjaque/hugging-face-model-download.git\\nMA_* ...... source declare-env.sh # in Sagemaker\\n```\\n\\n## Run in sagemaker\\n\\n### build and push images\\n\\n```\\nsh latest-build.sh <AWS_ACCOUNT>\\n```\\n\\n### run job\\n\\n```\\nsh latest-run.sh\\n```\\n\\n## please see \\n<https://github.com/huggingface/transformers/tree/master/examples/pytorch/token-classification>\\n"}'

In [None]:
{
  "generated_text": "\n    assert isinstance(host, str), \\\n        \"host should be str, not {}\".format(type(host))\n    \n    # convert host to ipv4. if ipv4 is already given, pass it as is\n    try:\n        ipv4 = socket.gethostbyname(host)\n    except socket.gaierror:\n        raise ValueError(\"given host {} is not valid\".format(host))\n    \n    delay_time = 1\n    while True:\n        resp = os.system(\"ping -n 1"
}
