In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

In [5]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFace, HuggingFaceModel
import json

In [3]:
try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

In [6]:
# Hub Model configuration. https://huggingface.co/models

trust_remote_code = True

hub = {
    'HF_MODEL_ID':'DAMO-NLP-SG/VideoLLaMA3-2B', # model_id from hf.co/models
    'HF_TASK':'visual-question-answering', # NLP task you want to use for predictions
    'HF_MODEL_TRUST_REMOTE_CODE': json.dumps(trust_remote_code)
}

In [7]:
huggingface_model = HuggingFaceModel(
   model_data="s3://YOUR_BUCKET/llava-full.tar.gz",
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.37", # transformers version used
   pytorch_version="2.1", # pytorch version used
   py_version="py310", # python version of the DLC
)

In [8]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.g5.xlarge"
)

-----------!

In [9]:
# example request, you always need to define "inputs"
image_path = "cat.png"
data = {
"inputs": {
    "content": [
            {
                "type": "image", 
                "image": {"image_path": image_path}
            },
            {
                "type": "text", 
                "text": "What is funny in the image?"},
        ]
    }
}

# request
predictor.predict(data)