## LLaMa Model Deployments (for neuron in inf. based instances) with different model configurations

-- Run the config.yaml file to store the models as well as your account execution role.

In [1]:
import yaml
import time
import json
import boto3
import asyncio
import logging
import sagemaker
from globals import *
from typing import Dict
from utils import load_config
from pathlib import Path
from sagemaker.predictor import Predictor
from botocore.exceptions import ClientError
from sagemaker.jumpstart.model import JumpStartModel


sagemaker.config INFO - Not applying SDK defaults from location: /opt/homebrew/share/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/madhurpt/Library/Application Support/sagemaker/config.yaml


In [2]:
# global constants
!pygmentize globals.py

[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mfrom[39;49;00m [04m[36menum[39;49;00m [34mimport[39;49;00m Enum[37m[39;49;00m
[34mfrom[39;49;00m [04m[36mpathlib[39;49;00m [34mimport[39;49;00m Path[37m[39;49;00m
[37m[39;49;00m
CONFIG_FILE: [36mstr[39;49;00m = [33m"[39;49;00m[33mconfig.yml[39;49;00m[33m"[39;49;00m[37m[39;49;00m
DATA_DIR: [36mstr[39;49;00m = [33m"[39;49;00m[33mdata[39;49;00m[33m"[39;49;00m[37m[39;49;00m
PROMPTS_DIR = os.path.join(DATA_DIR, [33m"[39;49;00m[33mprompts[39;49;00m[33m"[39;49;00m)[37m[39;49;00m
METRICS_DIR = os.path.join(DATA_DIR, [33m"[39;49;00m[33mmetrics[39;49;00m[33m"[39;49;00m)[37m[39;49;00m
MODELS_DIR = os.path.join(DATA_DIR, [33m"[39;49;00m[33mmodels[39;49;00m[33m"[39;49;00m)[37m[39;49;00m
DATASET_DIR = os.path.join(DATA_DIR, [33m"[39;49;00m[33mdataset[39;49;00m[33m"[39;49;00m)[37m[39;49;00m
DIR_LIST = [DATA_DIR, PROMPTS_DIR, METRICS_DIR, MODELS_DIR, DATASET_DIR][

In [3]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [11]:
config = load_config(CONFIG_FILE)
aws_region = config['aws']['region']
sagemaker_execution_role = config['aws']['sagemaker_execution_role']
logger.info(f"aws_region={aws_region}, sagemaker_execution_role={sagemaker_execution_role}")
logger.info(f"config={json.dumps(config, indent=2)}")

[2024-01-05 08:52:10,336] p54752 {2442081166.py:4} INFO - aws_region=us-east-1, sagemaker_execution_role=arn:aws:iam::015469603702:role/SageMakerRepoRole
[2024-01-05 08:52:10,337] p54752 {2442081166.py:5} INFO - config={
  "aws": {
    "region": "us-east-1",
    "sagemaker_execution_role": "arn:aws:iam::015469603702:role/SageMakerRepoRole"
  },
  "prompt": {
    "template_file": "prompt_template.txt",
    "all_prompts_file": "all_prompts.csv"
  },
  "datasets": [
    {
      "language": "en",
      "min_length_in_tokens": 1,
      "max_length_in_tokens": 500,
      "payload_file": "payload_{lang}_{min}-{max}.jsonl"
    },
    {
      "language": "en",
      "min_length_in_tokens": 500,
      "max_length_in_tokens": 1000,
      "payload_file": "payload_{lang}_{min}-{max}.jsonl"
    },
    {
      "language": "en",
      "min_length_in_tokens": 1000,
      "max_length_in_tokens": 2000,
      "payload_file": "payload_{lang}_{min}-{max}.jsonl"
    },
    {
      "language": "en",
      "mi

In [12]:
# function to deploy a model
def deploy_model(experiment_config: Dict, aws_region: str, role_arn: str) -> Dict:
    try:
        logger.info(f"going to deploy {experiment_config}, in {aws_region} with {role_arn}")
        deploy = experiment_config.get('deploy', False)
        if deploy is False:
            logger.error(f"skipping deployment of {experiment_config['model_id']} because deploy={deploy}")
            return None
        model = JumpStartModel(
            model_id=experiment_config['model_id'],
            model_version=experiment_config['model_version'],
            image_uri=experiment_config['image_uri'],
            env=experiment_config['env'],
            role=role_arn,
            instance_type=experiment_config['instance_type']
        )

        # Deploy the model using asyncio.to_thread to run in a separate thread
        ep_name = f"{experiment_config['ep_name']}-{int(time.time())}"
        accept_eula = experiment_config.get('accept_eula')
        if accept_eula is not None:
            predictor = model.deploy(initial_instance_count=experiment_config['instance_count'],
                                    accept_eula=accept_eula,
                                    endpoint_name=ep_name)
        else:
            predictor = model.deploy(initial_instance_count=experiment_config['instance_count'],            
                                     endpoint_name=ep_name)

        return dict(endpoint_name=predictor.endpoint_name, experiment_name=experiment_config['name'])
    except ClientError as error:
        print(f"an error occurred: {error}")
        return None

In [13]:
async def async_deploy_model(experiment_config: Dict, role_arn: str, aws_region: str) -> str:
    return await asyncio.to_thread(deploy_model, experiment_config, role_arn, aws_region)

async def async_deploy_all_models(config: Dict):
    return await asyncio.gather(*[async_deploy_model(m,
                                                     config['aws']['region'],
                                                     config['aws']['sagemaker_execution_role']) for m in config['experiments']])

In [4]:
# async version
s = time.perf_counter()
endpoint_names = await async_deploy_all_models(config)
elapsed_async = time.perf_counter() - s
print(f"endpoint_names -> {endpoint_names}, deployed in {elapsed_async:0.2f} seconds")

In [6]:
def get_all_info_for_endpoint(ep: Dict) -> Dict:
    ep_name = ep['endpoint_name']
    experiment_name = ep['experiment_name']
    if ep_name is None:
        return None
    sm_client = boto3.client('sagemaker')
    endpoint = sm_client.describe_endpoint(EndpointName=ep_name)
    endpoint_config = sm_client.describe_endpoint_config(EndpointConfigName=endpoint['EndpointConfigName'])
    model_config = sm_client.describe_model(ModelName=endpoint_config['ProductionVariants'][0]['ModelName'])
    info = dict(experiment_name=experiment_name,
                endpoint=endpoint,
                endpoint_config=endpoint_config,
                model_config=model_config)
    return info

all_info = list(map(get_all_info_for_endpoint, [ep for ep in endpoint_names if ep is not None]))
all_info

In [18]:
# write all end point info to a file so that other notebooks can read it
Path(ENDPOINT_LIST_FPATH).write_text(json.dumps(all_info, indent=2, default=str))

4806