In [1]:
!pip install -r requirements.txt



In [2]:
import os
import shutil
import sagemaker
import requests as req
from pathlib import Path 
from sagemaker.s3 import S3Uploader
from sagemaker import get_execution_role
from huggingface_hub import snapshot_download
from sagemaker.huggingface.model import HuggingFaceModel


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
# global constants
!pygmentize globals.py

[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mboto3[39;49;00m[37m[39;49;00m
[34mfrom[39;49;00m [04m[36msagemaker[39;49;00m[04m[36m.[39;49;00m[04m[36msession[39;49;00m [34mimport[39;49;00m Session[37m[39;49;00m
[37m[39;49;00m
[37m# global constants[39;49;00m[37m[39;49;00m
HF_MODEL_ID = [33m"[39;49;00m[33manymodality/llava-v1.5-7b[39;49;00m[33m"[39;49;00m[37m[39;49;00m
HF_MODEL_NAME = [33m"[39;49;00m[33mmodel_llava-v1.5-7b.tar.gz[39;49;00m[33m"[39;49;00m[37m[39;49;00m
HF_TASK = [33m"[39;49;00m[33mquestion-answering[39;49;00m[33m"[39;49;00m[37m[39;49;00m
TRANSFORMERS_VERSION = [33m"[39;49;00m[33m4.28.1[39;49;00m[33m"[39;49;00m[37m[39;49;00m
PYTORCH_VERSION = [33m"[39;49;00m[33m2.0.0[39;49;00m[33m"[39;49;00m[37m[39;49;00m
PYTHON_VERSION = [33m"[39;49;00m[33mpy310[39;49;00m[33m"[39;49;00m[37m[39;49;00m
[37m[39;49;00m
BUCKET_NAME = Session().default_bucket()[37m[39;49

In [4]:
%run -i globals.py
%run -i utils.py

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [5]:
create_s3_structure()

In [6]:
model_dir = HF_MODEL_ID.split("/")[-1]
model_dir

'llava-v1.5-7b'

In [7]:
model_tar_gz_path = os.path.join(os.path.dirname(os.getcwd()), f"model_{model_dir}.tar.gz")
model_tar_gz_path

'/home/ec2-user/model_llava-v1.5-7b.tar.gz'

In [8]:
%%time
model_path = os.path.join(os.path.dirname(os.getcwd()), model_dir)
Path(model_path).mkdir(exist_ok=True)
# Download model from Hugging Face into model_dir
snapshot_download(HF_MODEL_ID, local_dir=model_path, local_dir_use_symlinks=False)

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

deploy_llava.ipynb:   0%|          | 0.00/11.1k [00:00<?, ?B/s]

code/requirements.txt:   0%|          | 0.00/55.0 [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.55k [00:00<?, ?B/s]

code/inference.py:   0%|          | 0.00/3.19k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.16k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.54G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/27.1k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

CPU times: user 15.4 s, sys: 16.2 s, total: 31.6 s
Wall time: 1min 28s


'/home/ec2-user/llava-v1.5-7b'

In [9]:
inf_dest = os.path.join(model_path, 'code', 'inference.py')
shutil.copyfile("llava_inference.py", inf_dest)

'/home/ec2-user/llava-v1.5-7b/code/inference.py'

In [10]:
%%time
# Create SageMaker model.tar.gz artifact
!cd {model_path};tar -cf {model_tar_gz_path} --use-compress-program=pigz *;cd -

/home/ec2-user/SageMaker
CPU times: user 8.52 s, sys: 610 ms, total: 9.13 s
Wall time: 9min 24s


In [11]:
%%time
# upload model.tar.gz to s3
S3Uploader.upload(local_path=model_tar_gz_path, desired_s3_uri=S3_MODEL_URI)

print(f"model uploaded to: {S3_MODEL_URI}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
model uploaded to: s3://sagemaker-us-west-2-597703351594/multimodal/llava-v1.5-7b
CPU times: user 1min 31s, sys: 1min 34s, total: 3min 5s
Wall time: 2min


In [16]:
%%time

role = get_execution_role()
print(role)

hub = {
  'HF_TASK':HF_TASK
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=os.path.join(S3_MODEL_URI, HF_MODEL_NAME),
   role=role,                                  
   transformers_version=TRANSFORMERS_VERSION,  
   pytorch_version=PYTORCH_VERSION,            
   py_version=PYTHON_VERSION,                
   model_server_workers=1,
   env=hub
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge",
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
arn:aws:iam::597703351594:role/multimodal1-SMExecutionRole-HoKfi8ykGIMz
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
---------------!CPU times: user 608 ms, sys: 0 ns, total: 608 ms
Wall time: 8min 3s


In [19]:
with open("./endpointname.txt", 'w') as endpoint:
    endpoint.write(predictor.endpoint_name)