In [4]:
import boto3
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

# Initialize SageMaker session
sagemaker_session = sagemaker.Session()
role = "arn:aws:iam::120569606157:role/service-role/AmazonSageMaker-ExecutionRole-20250226T195151" # set role

# Set up the model environment variables with proper pooling
model_id = "intfloat/multilingual-e5-large-instruct"  # current model
new_endpoint_name = "e5-embeddings-pooled-2"

# Configure the model with proper pooling settings
hub = {
    'HF_MODEL_ID': model_id,
    'HF_TASK': 'feature-extraction',
    'HF_MODEL_POOLING': 'mean',
    'HF_TOKENIZER_PADDING': 'max_length',
    'HF_RETURN_SENTENCE_EMBEDDING': 'true'
}

# Create a new Hugging Face Model
huggingface_model = HuggingFaceModel(
    model_data=None,
    role=role,
    transformers_version="4.26.0",
    pytorch_version="1.13.1",
    py_version="py39",
    env=hub
)

# Deploy the model to a new endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g4dn.2xlarge",  # Choose appropriate instance type
    endpoint_name=new_endpoint_name
)

print(f"New endpoint {new_endpoint_name} deployed with proper pooling configuration!")