## LLAVA on SageMaker



In [1]:
%store -r

In [33]:
import boto3
import sagemaker
from sagemaker.utils import name_from_base
from sagemaker import image_uris
from pathlib import Path
from huggingface_hub import notebook_login
from huggingface_hub import snapshot_download


In [22]:
llm_engine = "deepspeed"
# llm_engine = "fastertransformer"

In [23]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
sm_client = sagemaker_session.sagemaker_client
sm_runtime_client = sagemaker_session.sagemaker_runtime_client
s3_client = boto3.client('s3')
default_bucket = sagemaker_session.default_bucket()

In [24]:
framework_name = f"djl-{llm_engine}"
inference_image_uri = image_uris.retrieve(
    framework=framework_name, region=sagemaker_session.boto_session.region_name, version="0.23.0"
)

print(f"Inference container uri: {inference_image_uri}")

Inference container uri: 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118


In [52]:
%%writefile llava-src/serving.properties
engine=DeepSpeed
option.tensor_parallel_degree=1
option.model_id=liuhaotian/llava-v1.5-13b
option.load_4bit=True

Overwriting llava-src/serving.properties


In [53]:
s3_target = f"s3://{sagemaker_session.default_bucket()}/llm/llava/llava-v15/code/"
print(s3_target)

s3://sagemaker-us-west-2-726335585155/llm/llava/llava-v15/code/


In [54]:
!rm llava-src.tar.gz
!tar zcvf llava-src.tar.gz llava-src --exclude ".ipynb_checkpoints" --exclude "__pycache__" --exclude ".ipynb"
!aws s3 cp llava-src.tar.gz {s3_target}

llava-src/
llava-src/model.py
llava-src/requirements.txt
llava-src/run_llava_local.py
llava-src/serving.properties
upload: ./llava-src.tar.gz to s3://sagemaker-us-west-2-726335585155/llm/llava/llava-v15/code/llava-src.tar.gz


In [55]:
model_uri = f"{s3_target}llava-src.tar.gz"
print(model_uri)

s3://sagemaker-us-west-2-726335585155/llm/llava/llava-v15/code/llava-src.tar.gz


### 4.2 Create SageMaker endpoint

You need to specify the instance to use and endpoint names

In [56]:
from sagemaker import Model, image_uris, serializers, deserializers

model = Model(image_uri=inference_image_uri, model_data=model_uri, role=role)

instance_type = "ml.g5.2xlarge"
endpoint_name = sagemaker.utils.name_from_base("llava-djl")

model.deploy(initial_instance_count=1,
             instance_type=instance_type,
             endpoint_name=endpoint_name
            )

# our requests and responses will be in json format so we specify the serializer and the deserializer
predictor = sagemaker.Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker_session,
    serializer=serializers.JSONSerializer(),
)

--------------------!

### invoke endpoint


In [57]:
%%time

prompt = "Describe the image"
data = {
    "input_image" : 'https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png', 
    "text" : [prompt],
    # "max_new_tokens" : 1024,
    # "temperature" : 0.2,
    # "stop_str" : "###"
}

# request
output = predictor.predict(data)
print(output)

b'The image features a small, red, fire-breathing toy animal, resembling a lizard or a dragon, with flames coming out of its back. The toy is wearing glasses, giving it a unique and quirky appearance. The toy is placed on a gray surface, possibly a table or a shelf, and it stands out as a fun and eye-catching decoration.</s>'
CPU times: user 16.3 ms, sys: 0 ns, total: 16.3 ms
Wall time: 9.34 s


In [59]:
!

/home/ec2-user/SageMaker/LLaVA/sagemaker/deploy_djl


## delete endpoint

In [20]:
# delete sagemaker endpoint
predictor.delete_endpoint()

ClientError: An error occurred (ValidationException) when calling the DeleteEndpointConfig operation: Could not find endpoint configuration "llava-djl-2024-09-25-01-07-39-327".