# Deploy a LLaMA 3.2 11B Vision Instruct Model Using SageMaker Endpoints with G6e instance

In this example you will deploy `LLaMA-3.2-11B-instruct` SageMaker Managed Endpoint.

In [None]:
!pip install -Uq sagemaker

In [None]:
!pip install -Uq transformers

In [None]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

In [None]:
# assert hub['HUGGING_FACE_HUB_TOKEN'] != '<REPLACE WITH YOUR TOKEN>', "You have to provide a token."

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'meta-llama/Llama-3.2-11B-Vision-Instruct',
	'SM_NUM_GPUS': json.dumps(1),
    # 'HUGGING_FACE_HUB_TOKEN': '<REPLACE WITH YOUR TOKEN>'
	'HUGGING_FACE_HUB_TOKEN': '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'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="2.3.1"), 
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
    instance_type="ml.g6e.2xlarge",
	container_startup_health_check_timeout=600,
  )

In [None]:
predictor.predict(
    {
        "inputs": "How to make chocolate Mousse?",
        "parameters": {
            "do_sample":True,
            "max_new_tokens":250,
            "top_p":0.9,
            "temperature":0.6,
        }
    }
)

In [None]:
##Delete endpoint after use to save costs
predictor.delete_endpoint(delete_endpoint_config=True)