In [1]:
%pip install sagemaker --upgrade  --quiet

Note: you may need to restart the kernel to use updated packages.


In [2]:
import boto3
import sagemaker
from sagemaker import Model, image_uris, serializers, deserializers

role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
region = sess._region_name  # region name of the current SageMaker Studio environment
account_id = sess.account_id()  # account_id of the current SageMaker Studio environment

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
!aws s3 ls s3://sagemaker-us-east-1-005329598202/hf_home/Meta-Llama-3-8B-Instruct/

                           PRE .huggingface/
                           PRE runs/
2024-05-21 07:15:29       1519 .gitattributes
2024-05-21 07:14:55       7801 LICENSE
2024-06-04 08:09:49       1573 README.md
2024-05-21 07:14:55       4696 USE_POLICY.md
2024-06-04 08:09:49        365 all_results.json
2024-05-21 06:30:00     112686 checklist.chk
2024-06-04 08:09:21        728 config.json
2024-05-21 06:30:00 16060617592 consolidated.00.pth
2024-06-04 08:09:21        178 eval_results.json
2024-06-04 08:09:21        194 generation_config.json
2024-06-04 08:08:37 4976698592 model-00001-of-00004.safetensors
2024-06-04 08:09:27 4999802616 model-00002-of-00004.safetensors
2024-06-04 08:09:00 4915916080 model-00003-of-00004.safetensors
2024-06-04 08:09:21 1168138808 model-00004-of-00004.safetensors
2024-06-04 08:09:20      23950 model.safetensors.index.json
2024-05-21 06:30:00     112686 params.json
2024-06-04 08:09:21        325 special_tokens_map.json
2024-06-04 08:09:20    9085698 tokenizer.j

## Step 2: Start preparing model artifacts
In LMI contianer, we expect some artifacts to help setting up the model
- serving.properties (required): Defines the model server settings
- model.py (optional): A python file to define the core inference logic
- requirements.txt (optional): Any additional pip wheel need to install

In [6]:
%%writefile serving.properties
engine=Python
option.model_id= s3://sagemaker-us-east-1-005329598202/hf_home/Meta-Llama-3-8B-Instruct/
option.dtype=fp16
option.task=text-generation
option.rolling_batch=vllm
option.tensor_parallel_degree=1
option.device_map=auto
option.max_model_len=4096
# max_rolling batch size
# 

Overwriting serving.properties


In [7]:
%%sh
mkdir mymodel
mv serving.properties mymodel/
tar czvf mymodel.tar.gz mymodel/
rm -rf mymodel

mymodel/
mymodel/serving.properties


## Step 3: Start building SageMaker endpoint
In this step, we will build SageMaker endpoint from scratch

### Getting the container image URI

[Large Model Inference available DLC](https://github.com/aws/deep-learning-containers/blob/master/available_images.md#large-model-inference-containers)


In [8]:
image_uri = image_uris.retrieve(
        framework="djl-deepspeed",
        region=sess.boto_session.region_name,
        version="0.26.0"
    )

### Upload artifact on S3 and create SageMaker model

In [9]:
s3_code_prefix = "huabao/code"
bucket = sess.default_bucket()  # bucket to house artifacts
code_artifact = sess.upload_data("mymodel.tar.gz", bucket, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {code_artifact}")

model = Model(image_uri=image_uri, model_data=code_artifact, role=role)

S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-1-005329598202/huabao/code/mymodel.tar.gz


### 4.2 Create SageMaker endpoint

You need to specify the instance to use and endpoint names

In [11]:
instance_type = "ml.g5.12xlarge"
endpoint_name = sagemaker.utils.name_from_base("lmi-model")

model.deploy(initial_instance_count=1,
             instance_type=instance_type,
             endpoint_name=endpoint_name
            )

# our requests and responses will be in json format so we specify the serializer and the deserializer
predictor = sagemaker.Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sess,
    serializer=serializers.JSONSerializer(),
)

--------------!

## Step 5: Test and benchmark the inference

In [21]:
input_text = '''
### Instruction What is the aspect based sentiment of the following customer content? Answer in format (aspect term | aspect category | opinion term | sentiment polarity). Aspect categorys: ['Channel diversity', 'Product dimensions', 'Device noise', 'Solar panel price', 'Output charging speed', 'Package quality upon receipt', 'Output port type', 'Arrival speed', 'Heat dissipation', 'Output power', 'Signature service', 'Product weight', 'Returns and exchanges', 'Port experience', 'Brand', 'Initial impressions', 'User manual', 'Product description', 'Product performance', 'Product quality control', 'Charging', 'Price protection policy', 'Channel convenience', 'Remote control', 'Product noise', 'Product availability', 'Price fluctuation', 'Solar panel accessories', 'Ups', 'Doorstep delivery', 'Product user experience', 'Product safety', 'Lighting features', 'Customer service response time', 'Product color', 'Appearance design', 'Recommendation', 'Usage scenario', 'Self-charging speed', 'Waterproof', 'Correct delivery content and quantity', 'Battery cell chemistry', 'Accessory prices', 'App', 'Battery capacity', 'Input charging method', 'Power station quality control', 'Customer support', 'Compatibility', 'Recycling policy', 'Product value', 'Charge quality', 'Carrying case bag', 'High temperature resistance', 'Product suggestion', 'Dustproof', 'Wireless charging', 'Product failure', 'Product durability', 'Ouput power', 'Fault notification', 'Number of output ports', 'Ups function', 'Display screen', 'Self-discharge rate', 'Delivery speed', 'Reshipment', 'Solar panel usability', 'Parallel operation function', 'Product quality', 'Packing', 'Cash on delivery', 'Handle', 'Low temperature resistance', 'Product repurchase']. Customer content: ****EDIT Ecoflow offered me $100 to change this review. I told them to pound sand. Keep that in mind when considering their products****. Bought this for the simple job of backup power to my router and wireless camera base - less than 50W total. The first unit, within a few days the AC stopped output, even though it showed on. Replacement came, today, after two weeks - same issue. This really scares the hell out of me because I have another $5k in their Delta Pro and panels. Will they be reliable when needed?? So far A BIG LET DOWN ON THE RIVER 2. 👎🏽👎🏽👎🏽👎🏽
'''

In [22]:
import json
res=predictor.predict(
    {"inputs": input_text, "parameters": {"max_tokens":4096}}
)
response = json.loads(res)
response['generated_text']

"### Answer\n[['Ecoflow', 'Brand', 'offered me $100 to change this review', 'Negative'], ['backup power', 'Usage scenario', 'backup power to my router and wireless camera base', 'Positive'], ['AC stopped output', 'Product failure', 'AC stopped output', 'Negative'], ['Replacement', 'Product failure', 'same issue', 'Negative'], ['Delta Pro and panels', 'Product durability', 'Will they be reliable when needed??', 'Negative'], ['RIVER 2', 'Product failure', 'A BIG LET DOWN ON THE RIVER 2', 'Negative']]"

## batch predict

In [28]:
prompts=[]
for i in range(1000):
    prompts.append(input_text)

In [35]:
import time
def call_endpoint(prompt):
    input = {"inputs": prompt, "parameters": {"max_tokens":4096}}
    input = json.dumps(input)
    # start = time.time()

    response = predictor.predict(
    {"inputs": input_text, "parameters": {"max_tokens":4096}}
    )
    result = json.loads(response)['generated_text']
    return result
    # end = time.time()
    # process_time=end-start
    # print("process time:"+str(int(process_time)))
    # print(results)
    
    
from joblib import Parallel, delayed


results = Parallel(n_jobs=100, prefer='threads', verbose=1)(
    delayed(call_endpoint)(prompt)
    for prompt in prompts
)   

[Parallel(n_jobs=100)]: Using backend ThreadingBackend with 100 concurrent workers.
[Parallel(n_jobs=100)]: Done 250 tasks      | elapsed:   28.1s
[Parallel(n_jobs=100)]: Done 600 tasks      | elapsed:  1.0min
[Parallel(n_jobs=100)]: Done 1000 out of 1000 | elapsed:  1.6min finished


In [38]:
results[0:10]

["### Answer\n[['Ecoflow', 'Brand', 'offered me $100 to change this review', 'Negative'], ['backup power', 'Usage scenario', 'backup power to my router and wireless camera base', 'Positive'], ['AC stopped output', 'Product failure', 'AC stopped output', 'Negative'], ['Replacement', 'Product failure', 'same issue', 'Negative'], ['Delta Pro and panels', 'Product durability', 'Will they be reliable when needed??', 'Negative'], ['RIVER 2', 'Product failure', 'A BIG LET DOWN ON THE RIVER 2', 'Negative']]",
 "### Answer\n[['Ecoflow', 'Brand', 'offered me $100 to change this review', 'Negative'], ['backup power', 'Usage scenario', 'backup power to my router and wireless camera base', 'Positive'], ['AC stopped output', 'Product failure', 'AC stopped output', 'Negative'], ['Replacement', 'Product failure', 'same issue', 'Negative'], ['$5k', 'Product durability', 'Will they be reliable when needed??', 'Negative'], ['RIVER 2', 'Product failure', 'A BIG LET DOWN ON THE RIVER 2', 'Negative']]",
 "#

## Clean up the environment

In [None]:
sess.delete_endpoint(endpoint_name)
sess.delete_endpoint_config(endpoint_name)
model.delete_model()