In [30]:
!pip install sagemaker boto3 huggingface_hub --upgrade --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.29.63 requires botocore==1.31.63, but you have botocore 1.32.2 which is incompatible.
distributed 2022.7.0 requires tornado<6.2,>=6.0.3, but you have tornado 6.3.3 which is incompatible.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [60]:
import sagemaker
from sagemaker import image_uris
import boto3
from huggingface_hub import snapshot_download
from pathlib import Path
import jinja2
from time import gmtime, strftime
from sagemaker.spark.processing import PySparkProcessor
import re

Step 1: create the needed resources using the CFN

In [25]:
cloudformation = boto3.client('cloudformation')

template_body = open('resource_cfn.yaml').read()

stack_name = 'async-llm-demo-resource-creation-stack'

response = cloudformation.create_stack(
    StackName=stack_name,
    TemplateBody=template_body,
    Capabilities=['CAPABILITY_IAM']
)

cloudformation.get_waiter('stack_create_complete').wait(StackName=stack_name)

outputs = cloudformation.describe_stacks(StackName=stack_name)['Stacks'][0]['Outputs']

for output in outputs:
    if output['OutputKey'] == 'BucketName':
        BucketName = output['OutputValue']
    elif output['OutputKey'] == 'AsyncEndpointDDBTable':  
        AsyncEndpointDDBTable = output['OutputValue']
    elif output['OutputKey'] == 'SuccessInvocationTopic':  
        SuccessInvocationTopic = output['OutputValue']
    elif output['OutputKey'] == 'FailedInvocationTopic':  
        FailedInvocationTopic = output['OutputValue']

Step 2: create Async Endpoint

In [28]:
blip_model_version = "blip2-flan-t5-xl"
model_names = {
    "caption_model_name": blip_model_version, #@param ["blip-base", "blip-large", "blip2-flan-t5-xl"]
}
with open("blip2/model_name.json",'w') as file:
    json.dump(model_names, file)

In [32]:
s3_model_prefix = "model_blip2"
pretrained_model_location = f"s3://{BucketName}/{s3_model_prefix}/"

CAPTION_MODELS = {
    'blip-base': 'Salesforce/blip-image-captioning-base',   # 990MB
    'blip-large': 'Salesforce/blip-image-captioning-large', # 1.9GB
    'blip2-2.7b': 'Salesforce/blip2-opt-2.7b',              # 15.5GB
    'blip2-flan-t5-xl': 'Salesforce/blip2-flan-t5-xl',      # 15.77GB
}

# - This will download the model into the current directory where ever the jupyter notebook is running
local_model_path = Path("./blip2-model")
local_model_path.mkdir(exist_ok=True)
model_name = CAPTION_MODELS[blip_model_version]
# Only download pytorch checkpoint files
allow_patterns = ["*.json", "*.pt", "*.bin", "*.txt", "*.model"]

# - Leverage the snapshot library to donload the model since the model is stored in repository using LFS
model_download_path = snapshot_download(
    repo_id=model_name,
    cache_dir=local_model_path,
    allow_patterns=allow_patterns,
)

Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]

(…)e9c05a443a226614/special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

(…)a443a226614/pytorch_model.bin.index.json:   0%|          | 0.00/128k [00:00<?, ?B/s]

(…)d4d1c37753c7e9c05a443a226614/config.json:   0%|          | 0.00/7.68k [00:00<?, ?B/s]

(…)1c37753c7e9c05a443a226614/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

(…)9c05a443a226614/preprocessor_config.json:   0%|          | 0.00/432 [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.44G [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/6.33G [00:00<?, ?B/s]

(…)c7e9c05a443a226614/tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

In [65]:
sess = sagemaker.session.Session(default_bucket=BucketName)  
# upload the model artifacts to s3
model_artifact = sess.upload_data(path=model_download_path, key_prefix=s3_model_prefix)
print(f"Model uploaded to --- > {model_artifact}")
print(f"We will set option.s3url={model_artifact}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
Model uploaded to --- > s3://async-endpoint-with-llm-demo-544169759337/model_blip2
We will set option.s3url=s3://async-endpoint-with-llm-demo-544169759337/model_blip2


In [35]:
%%writefile blip2/serving.properties
engine = Python
option.tensor_parallel_degree = 1
option.model_id = {{s3url}}

Overwriting blip2/serving.properties


In [41]:
jinja_env = jinja2.Environment()
template = jinja_env.from_string(Path("blip2/serving.properties").open().read())
Path("blip2/serving.properties").open("w").write(
    template.render(s3url=pretrained_model_location)
)
!pygmentize blip2/serving.properties | cat -n

     1	[36mengine[39;49;00m[37m [39;49;00m=[37m [39;49;00m[33mPython[39;49;00m[37m[39;49;00m
     2	[36moption.tensor_parallel_degree[39;49;00m[37m [39;49;00m=[37m [39;49;00m[33m1[39;49;00m[37m[39;49;00m
     3	[36moption.model_id[39;49;00m[37m [39;49;00m=[37m [39;49;00m[33ms3://async-endpoint-with-llm-demo-544169759337/model_blip2/[39;49;00m[37m[39;49;00m


In [42]:
%%sh
tar czvf model.tar.gz blip2/

blip2/
blip2/model.py
blip2/requirements.txt
blip2/.ipynb_checkpoints/
blip2/.ipynb_checkpoints/serving-checkpoint.properties
blip2/.ipynb_checkpoints/model-checkpoint.py
blip2/.ipynb_checkpoints/requirements-checkpoint.txt
blip2/model_name.json
blip2/serving.properties


In [45]:
s3_code_prefix = "blip2" 
s3_code_artifact = sess.upload_data("model.tar.gz",BucketName, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}")
ModelDataS3URLParameter = s3_code_artifact

S3 Code or Model tar ball uploaded to --- > s3://async-endpoint-with-llm-demo-544169759337/blip2/model.tar.gz


In [27]:
ModelImageParameter = image_uris.retrieve(
    framework="djl-deepspeed", region=sess.boto_session.region_name, version="0.22.1"
)

In [64]:
parameters = [
    {
        'ParameterKey': 'ModelDataS3URLParameter',
        'ParameterValue': ModelDataS3URLParameter
    },
    {
        'ParameterKey': 'ModelImageParameter',
        'ParameterValue': ModelImageParameter
    },
    {
        'ParameterKey': 'SuccessInvocationTopic',
        'ParameterValue': SuccessInvocationTopic
    },
    {
        'ParameterKey': 'FailedInvocationTopic',
        'ParameterValue': FailedInvocationTopic
    },
    {
        'ParameterKey': 'AsyncEndpointS3Bucket',
        'ParameterValue': BucketName
    }
]

stack_name = 'async-llm-demo-endpoint-creation-stack'

template_body = open('endpoint_cfn.yaml').read()

response = cloudformation.create_stack(
    StackName=stack_name,
    TemplateBody=template_body,
    Parameters=parameters,
    Capabilities=['CAPABILITY_NAMED_IAM']
)

cloudformation.get_waiter('stack_create_complete').wait(StackName=stack_name)

outputs = cloudformation.describe_stacks(StackName=stack_name)['Stacks'][0]['Outputs']

for output in outputs:
    if output['OutputKey'] == 'SageMakerEndpoint':
        SageMakerEndpoint = output['OutputValue']

Step 3: create a processing job to process the images and invoke the endpoint

In [46]:
%%writefile processing.py
import boto3
import json
import re
import base64
import random
import string
from datetime import datetime
import argparse
from pyspark.sql import SparkSession

def generate_inference_id(length):
    chars = string.ascii_letters + string.digits
    inference_id = ''.join(random.choice(chars) for _ in range(length))
    return inference_id

def encode_image(bucket_name, key):
    s3 = boto3.client('s3', region_name=AWS_REGION)  
    body = s3.get_object(Bucket=bucket_name, Key=key)['Body'].read()
    return base64.b64encode(body).decode("utf-8")    
  
def convert_image_to_payload(prompt, encoded_image):
    inputs = {"prompt": prompt, "image": encoded_image}   
    return json.dumps(inputs)

def upload_payload_to_s3(bucket_name, key, body):
    s3 = boto3.client('s3', region_name=AWS_REGION)  
    s3.put_object(Bucket=bucket_name, Key=key, Body=body)
    return key

def process_image_and_save_to_s3(prompt, file_s3_path, input_s3_prefix):   
    # s3 = boto3.client('s3')  
    # Use a regex to parse out the bucket name and key
    match = re.match("s3://(.+?)/(.+)", file_s3_path)
    bucket_name = match.group(1)
    key = match.group(2)
    # Split the key on '/' to get the file name
    path_parts = key.split("/")
    file_name = path_parts[-1]    
    input_name = file_name.replace('.jpg', '.json') 
    input_key = input_s3_prefix + '/' + input_name  
    encoded_body = convert_image_to_payload(prompt, encode_image(bucket_name, key))
    upload_payload_to_s3(bucket_name, input_key, encoded_body)    
    return f's3://{bucket_name}/{input_key}'

def invoke_endpoint(endpoint_name, input_location):
    sagemaker_client = boto3.client('sagemaker-runtime', region_name=AWS_REGION)
    response = sagemaker_client.invoke_endpoint_async(
        EndpointName = endpoint_name,
        InputLocation = input_location,
        InferenceId = generate_inference_id(40)     
    )
    request_time = datetime.strptime(response['ResponseMetadata']['HTTPHeaders']['date'], "%a, %d %b %Y %H:%M:%S %Z").strftime("%Y-%m-%dT%H:%M:%S.000Z")
    inference_id = response['InferenceId']
    output_location = response['OutputLocation']
    return inference_id, output_location, request_time

def ddb_registration(table_name, prompt, inference_id, endpoint_name, input_location, output_location, request_time):
    dynamodb = boto3.resource('dynamodb', region_name=AWS_REGION)
    table = dynamodb.Table(table_name)
    item = {
        'inference_id': inference_id,
        'prompt': prompt,
        'endpoint_name' : endpoint_name,
        'input_location' : input_location, 
        'output_location' : output_location, 
        'request_time' : request_time
        
    }
    table.put_item(Item=item)
    
def execute(file_s3_path):
    prompt = PROMPT
    input_s3_prefix = INPUT_S3_PREFIX
    endpoint_name = ENDPOINT_NAME
    table_name = TABLE_NAME   
    input_location = process_image_and_save_to_s3(prompt, file_s3_path, input_s3_prefix)
    inference_id, output_location, request_time = invoke_endpoint(endpoint_name, input_location)
    ddb_registration(table_name, prompt, inference_id, endpoint_name, input_location, output_location, request_time)
    
if __name__ == "__main__":    
    
    parser = argparse.ArgumentParser(description="app configuration")
    parser.add_argument("--prompt", type=str, help="prompt for the images")
    parser.add_argument("--endpoint_name", type=str, help="async endpoint name")
    parser.add_argument("--input_s3_prefix", type=str, help="the prefix of the s3 input for invocation")
    parser.add_argument("--s3_path", type=str, help="the s3 path to the raw images")
    parser.add_argument("--table_name", type=str, help="DDB table")
    parser.add_argument("--aws_region", type=str, help="AWS region")
     
    args, _ = parser.parse_known_args()
    print("Received arguments {}".format(args))
    
    PROMPT =  args.prompt
    ENDPOINT_NAME = args.endpoint_name
    INPUT_S3_PREFIX = args.input_s3_prefix
    S3_PATH = args.s3_path
    TABLE_NAME = args.table_name 
    AWS_REGION= args.aws_region
    
    spark = SparkSession.builder.appName('PySparkApp').getOrCreate()

    file_list = spark.sparkContext.wholeTextFiles(S3_PATH).map(lambda x: x[0])
    file_list.foreach(execute)

    spark.stop()


Overwriting processing.py


In [None]:
timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
prefix = "sagemaker/spark-preprocess-demo/{}".format(timestamp_prefix)

ENDPOINT_NAME = re.search(r'.+endpoint/(.+)', SageMakerEndpoint).group(1)
PROMPT = 'Question: what can I see in this photo? Answer:'
INPUT_S3_PREFIX = 'spark/demo' 
S3_PATH = 's3://<BUCKET>/images/' ### this is images location
TABLE_NAME = AsyncEndpointDDBTable
AWS_REGION = sess._region_name
role = sagemaker.get_execution_role()

# Run the processing job
spark_processor = PySparkProcessor(
    base_job_name="sm-spark",
    framework_version="3.1",
    role=role,
    instance_count=2,
    instance_type="ml.m5.xlarge",
    max_runtime_in_seconds=1200,
)

spark_processor.run(
    submit_app="processing.py",
    arguments=[
        "--prompt",
        PROMPT,
        "--endpoint_name",
        ENDPOINT_NAME,
        "--input_s3_prefix",
        INPUT_S3_PREFIX,
        "--s3_path",
        S3_PATH,
        "--table_name",
        TABLE_NAME,
        "--aws_region",
        AWS_REGION
    ],
    spark_event_logs_s3_uri="s3://{}/{}/spark_event_logs".format(BucketName, prefix),
    logs=True,
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


INFO:sagemaker:Creating processing-job with name sm-spark-2023-11-17-05-23-39-658


.