### How to deploy and fine-tune DeepSeek models on AWS
> https://huggingface.co/blog/deepseek-r1-aws

In [1]:
!pip install sagemaker --upgrade
# !pip install --force-reinstall --no-cache-dir sagemaker==2.235.2


Collecting sagemaker
  Downloading sagemaker-2.248.2-py3-none-any.whl.metadata (17 kB)
Collecting attrs<26,>=24 (from sagemaker)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Downloading sagemaker-2.248.2-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m99.8 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached attrs-25.3.0-py3-none-any.whl (63 kB)
Installing collected packages: attrs, sagemaker
[2K  Attempting uninstall: attrs
[2K    Found existing installation: attrs 23.2.0
[2K    Uninstalling attrs-23.2.0:
[2K      Successfully uninstalled attrs-23.2.0
[2K  Attempting uninstall: sagemaker
[2K    Found existing installation: sagemaker 2.245.0
[2K    Uninstalling sagemaker-2.245.0:
[2K      Successfully uninstalled sagemaker-2.245.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [sagemaker]/2[0m [sagemaker]
[1A[2K[31mERROR: pip's dependency resolver does not currently take into acc

In [2]:
import json
import sagemaker
import boto3
import time
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri


try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [3]:


model_id = "deepseek-ai/deepseek-llm-r1-distill-qwen-1-5b"
model_name = model_id.split("/")[-1].lower()


# Hub Model configuration. https://huggingface.co/models
vllm_config = {
    "HF_MODEL_ID": model_name,
    "SM_NUM_GPUS": json.dumps(8)
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    image_uri=get_huggingface_llm_image_uri("huggingface", version="3.0.1"),
    env=vllm_config,
    role=role,
)


In [9]:
# Generate a unique name with timestamp
# timestamp = int(time.time())
# endpoint_config_name = f"my-endpoint-config-{timestamp}"


endpoint_name = f"{model_name}-ep"

INSTANCE_TYPE = "ml.g6.2xlarge"
# The g6.2xlarge instance is in the GPU instance family with 8 vCPUs, 32 GiB of memory and up to 10 Gibps of bandwidth starting at $0.9776 per hour.
# https://instances.vantage.sh/aws/ec2/g6.2xlarge?currency=USD

# INSTANCE_TYPE = "ml.g5.2xlarge" 
# #The g5.2xlarge instance is in the GPU instance family with 8 vCPUs, 32 GiB of memory and up to 10 Gibps of bandwidth starting at $1.212 per hour.
# #https://instances.vantage.sh/aws/ec2/g5.2xlarge?currency=USD

# INSTANCE_TYPE = "ml.g5.12xlarge"
# # The g5. 12xlarge instance is in the GPU instance family with 48 vCPUs, 192 GiB of memory and 40 Gibps of bandwidth starting at $5.672 per hour.
# # https://instances.vantage.sh/aws/ec2/g5.12xlarge?currency=USD




# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type=INSTANCE_TYPE,
    container_startup_health_check_timeout=2400, #1600
    endpoint_name=endpoint_name,
)



In [10]:
# send request
predictor.predict({"inputs": "What is the meaning of life?"})

In [11]:
# Make sure you delete the endpoint once you finished testing it.
predictor.delete_model()
predictor.delete_endpoint()