### How to deploy and fine-tune DeepSeek models on AWS
> https://huggingface.co/blog/deepseek-r1-aws

In [1]:
!pip install sagemaker --upgrade
# !pip install --force-reinstall --no-cache-dir sagemaker==2.235.2




In [2]:
import json
import sagemaker
import boto3
import time
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri


try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [3]:
model_id = "deepseek-ai/deepseek-llm-r1-distill-qwen-1-5b"
model_name = model_id.split("/")[-1].lower()


# Hub Model configuration. https://huggingface.co/models
vllm_config = {
    "HF_MODEL_ID": model_name,
    "SM_NUM_GPUS": json.dumps(8)
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    image_uri=get_huggingface_llm_image_uri("huggingface", version="3.0.1"),
    env=vllm_config,
    role=role,
)


In [4]:
# Generate a unique name with timestamp
# timestamp = int(time.time())
# endpoint_config_name = f"my-endpoint-config-{timestamp}"


endpoint_name = f"{model_name}-ep"
INSTANCE_TYPE = "ml.g6.2xlarge"
# The g6.2xlarge instance is in the GPU instance family with 8 vCPUs, 32 GiB of memory and up to 10 Gibps of bandwidth starting at $0.9776 per hour.
# https://instances.vantage.sh/aws/ec2/g6.2xlarge?currency=USD

# INSTANCE_TYPE = "ml.g5.2xlarge" 
# #The g5.2xlarge instance is in the GPU instance family with 8 vCPUs, 32 GiB of memory and up to 10 Gibps of bandwidth starting at $1.212 per hour.
# #https://instances.vantage.sh/aws/ec2/g5.2xlarge?currency=USD

# INSTANCE_TYPE = "ml.g5.12xlarge"
# # The g5. 12xlarge instance is in the GPU instance family with 48 vCPUs, 192 GiB of memory and 40 Gibps of bandwidth starting at $5.672 per hour.
# # https://instances.vantage.sh/aws/ec2/g5.12xlarge?currency=USD



In [5]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type=INSTANCE_TYPE,
    container_startup_health_check_timeout=2400, #1600
    endpoint_name=endpoint_name,
)

Please check the troubleshooting guide for common errors: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-python-sdk-troubleshooting.html#sagemaker-python-sdk-troubleshooting-create-endpoint


In [10]:
# send request
predictor.predict({"inputs": "What is the meaning of life?"})

In [11]:
# Make sure you delete the endpoint once you finished testing it.
predictor.delete_model()
predictor.delete_endpoint()