## LLAVA on SageMaker



In [1]:
%store -r

In [6]:
!pwd

/home/ec2-user/SageMaker/klook/first_pic/deploy_djl


In [8]:
!aws s3 cp /home/ec2-user/SageMaker/klook/first_pic/models/first/ s3://jackie-test/klook/ --recursive

upload: ../models/first/config.json to s3://jackie-test/klook/config.json          
upload: ../models/first/.ipynb_checkpoints/trainer_state-checkpoint.json to s3://jackie-test/klook/.ipynb_checkpoints/trainer_state-checkpoint.json
upload: ../models/first/generation_config.json to s3://jackie-test/klook/generation_config.json
upload: ../models/first/.ipynb_checkpoints/config-checkpoint.json to s3://jackie-test/klook/.ipynb_checkpoints/config-checkpoint.json
upload: ../models/first/model-00005-of-00006.safetensors to s3://jackie-test/klook/model-00005-of-00006.safetensors
upload: ../models/first/model-00002-of-00006.safetensors to s3://jackie-test/klook/model-00002-of-00006.safetensors
upload: ../models/first/model-00004-of-00006.safetensors to s3://jackie-test/klook/model-00004-of-00006.safetensors
upload: ../models/first/model.safetensors.index.json to s3://jackie-test/klook/model.safetensors.index.json
upload: ../models/first/special_tokens_map.json to s3://jackie-test/klook/special_

In [9]:
import boto3
import sagemaker
from sagemaker.utils import name_from_base
from sagemaker import image_uris

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [10]:
llm_engine = "deepspeed"
# llm_engine = "fastertransformer"

In [11]:
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
sm_client = sagemaker_session.sagemaker_client
sm_runtime_client = sagemaker_session.sagemaker_runtime_client
s3_client = boto3.client('s3')
default_bucket = sagemaker_session.default_bucket()

In [12]:
framework_name = f"djl-{llm_engine}"
inference_image_uri = image_uris.retrieve(
    framework=framework_name, region=sagemaker_session.boto_session.region_name, version="0.23.0"
)

print(f"Inference container uri: {inference_image_uri}")

Inference container uri: 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118


In [13]:
s3_target = f"s3://{sagemaker_session.default_bucket()}/llm/llava/llava-v15/code/"
print(s3_target)

s3://sagemaker-us-west-2-726335585155/llm/llava/llava-v15/code/


In [14]:
!rm llava-src.tar.gz
!tar zcvf llava-src.tar.gz llava-src --exclude ".ipynb_checkpoints" --exclude "__pycache__" --exclude ".ipynb"
!aws s3 cp llava-src.tar.gz {s3_target}

rm: cannot remove ‘llava-src.tar.gz’: No such file or directory
llava-src/
llava-src/model.py
llava-src/requirements.txt
llava-src/first_page_pic_infer.py
llava-src/run_llava_local.py
llava-src/serving.properties
upload: ./llava-src.tar.gz to s3://sagemaker-us-west-2-726335585155/llm/llava/llava-v15/code/llava-src.tar.gz


In [15]:
model_uri = f"{s3_target}llava-src.tar.gz"
print(model_uri)

s3://sagemaker-us-west-2-726335585155/llm/llava/llava-v15/code/llava-src.tar.gz


### 4.2 Create SageMaker endpoint

You need to specify the instance to use and endpoint names

In [None]:
from sagemaker import Model, image_uris, serializers, deserializers

model = Model(image_uri=inference_image_uri, model_data=model_uri, role=role)

instance_type = "ml.g5.2xlarge"
endpoint_name = sagemaker.utils.name_from_base("llava-djl")

model.deploy(initial_instance_count=1,
             instance_type=instance_type,
             endpoint_name=endpoint_name
            )

# our requests and responses will be in json format so we specify the serializer and the deserializer
predictor = sagemaker.Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker_session,
    serializer=serializers.JSONSerializer(),
)

----------------

### invoke endpoint


In [None]:
%%time

prompt = "Describe the image"
data = {
    "input_image" : 'https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png', 
    "text" : [prompt],
    # "max_new_tokens" : 1024,
    # "temperature" : 0.2,
    # "stop_str" : "###"
}

# request
output = predictor.predict(data)
print(output)

In [None]:
%%time
import time

# request
t0=time.time()
for i in range(1000):
    output = predictor.predict(data)
print(time.time()-t0)
print(output)

In [61]:
1061/60/60*1.515

0.44650416666666665

## delete endpoint

In [None]:
# delete sagemaker endpoint
predictor.delete_endpoint()