# Deploy Mixtral with LMI
## Hugging Face Accelerate

### 8bit quantization
See serving.properties

In [2]:
model_name = "mixtral-8bit"
model_filename = "djl-hf-" + model_name + ".tar.gz"
s3_prefix = "djl-hf-" + model_name
instance_type = "ml.g5.12xlarge" #"ml.g4dn.12xlarge"

In [3]:
!pip install -U sagemaker

Collecting sagemaker
  Downloading sagemaker-2.200.1-py2.py3-none-any.whl.metadata (13 kB)
Collecting urllib3<1.27 (from sagemaker)
  Downloading urllib3-1.26.18-py2.py3-none-any.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.9/48.9 kB[0m [31m443.4 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
Downloading sagemaker-2.200.1-py2.py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading urllib3-1.26.18-py2.py3-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.8/143.8 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: urllib3, sagemaker
  Attempting uninstall: urllib3
    Found existing installation: urllib3 2.1.0
    Uninstalling urllib3-2.1.0:
      Successfully uninstalled urllib3-2.1.0
  Attempting uninstall: sagemaker
    Found existing installa

In [4]:
import time
import sagemaker
from sagemaker.model import Model
from sagemaker import image_uris
from sagemaker import serializers, deserializers

role = sagemaker.get_execution_role()
sess = sagemaker.session.Session()
bucket = sess.default_bucket()
region = sess._region_name

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [5]:
# upload model code archive to S3
!rm {model_filename} 2> /dev/null
!tar -czf {model_filename} {s3_prefix}
s3_artifact = sess.upload_data(model_filename, bucket, s3_prefix)
s3_artifact

's3://sagemaker-us-west-2-261416220256/djl-hf-mixtral-8bit/djl-hf-mixtral-8bit.tar.gz'

In [6]:
def create_model(_model_name, model_s3_url):
    # Get the DJL DeepSpeed image uri
    image_uri = image_uris.retrieve(
        framework="djl-deepspeed",
        region=region,
        version="0.25.0"
    )
    model = Model(
        image_uri=image_uri,
        model_data=model_s3_url,
        role=role,
        name=_model_name,
        sagemaker_session=sess,
    )
    return model

In [7]:
def deploy_model(model, _endpoint_name):
    model.deploy(
        initial_instance_count=1,
        instance_type=instance_type,
        endpoint_name=_endpoint_name,
        container_startup_health_check_timeout=1800
        #endpoint_logging=True
    )
    predictor = sagemaker.Predictor(
        endpoint_name=_endpoint_name,
        sagemaker_session=sess,
        serializer=serializers.JSONSerializer(),
        deserializer=deserializers.JSONDeserializer()
    )
    return predictor

In [8]:
endpoint_name = model_name + "-" + time.strftime("%Y%m%d-%H%M%S")
endpoint_name

'mixtral-8bit-20231218-081639'

In [9]:
model = create_model(endpoint_name, s3_artifact)

In [10]:
predictor = deploy_model(model, endpoint_name)

--------------------------!

In [11]:
def test_model(prompt):
    print(predictor.predict(
        data={ 
            "inputs" : prompt,
            "parameters": { "max_length": 100 }
        }
    ))

In [12]:
test_model("What is SageMaker LMI?")

[{'generated_text': 'What is SageMaker LMI?\n\nSageMaker LMI is a tool that allows you to quickly and easily create, train, and deploy machine learning models on Amazon SageMaker. It provides a simple, intuitive interface for data scientists and developers to build, test, and deploy models without having to worry about the underlying infrastructure.\n\nSageMaker LMI supports a wide range of machine learning algorithms, including deep learning, computer vision, natural language'}]


In [None]:
test_model("What are the recommended steps to train for an AWS Solutions Architect certification?")

In [None]:
test_model("Name the stars in our solar system in order from fartherst to closest to the sun")

### Cleanup Resources

In [13]:
predictor.delete_endpoint()

In [14]:
model.delete_model()