# inference on Docker Image


In [72]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [73]:
import os
%store -r training_job_name

In [74]:
print(training_job_name)

bert2tweet-2020-07-08-07-58-27-895


In [75]:
container_image = '057716757052.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-serving:2.0.0-cpu'

# model_path='s3://sagemaker-us-east-2-XXXXXXXXXXXXXX/[TTTTTTTT]-training-job/output/model.tar.gz'

In [76]:
# model = Model(model_data=model_path, role=role, framework_version='2.0.0',  entry_point='inference.py', source_dir='./source_dir', image=container_image)
# predictor = model.deploy(initial_instance_count=1, instance_type='ml.t2.medium',accelerator_type='ml.eia2.medium', endpoint_name=training_job + '-t2me-eia2-invoke')

In [77]:
from sagemaker.tensorflow.serving import Model

model = Model(model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name),
              role=role,
              framework_version='2.0.0',
              entry_point='inference.py',
              image=container_image
             ) # Elastic Inference does not yet support TF 2.1.0 as of sagemaker==1.56.1

In [78]:
instance_type='ml.m4.xlarge'
deployed_model = model.deploy(initial_instance_count = 1,
                             instance_type = instance_type,
                             wait=True)


-------------!

## Invoke the endpoint without custom Docker image

In [79]:
tweet_bert_endpoint_name = deployed_model.endpoint
print(tweet_bert_endpoint_name)

sagemaker-tensorflow-serving-2020-07-09-00-37-50-247


In [80]:
import json
from sagemaker.tensorflow.serving import Predictor

predictor = Predictor(endpoint_name = tweet_bert_endpoint_name,
                      sagemaker_session = sess,
                      content_type = 'application/json',
                      model_name = 'saved_model',
                      model_version=0
                     )

In [55]:

import json

def input_handler(instances, tokenizer, max_seq_length):
    transformed_instances = []

    for instance in instances:
        encode_plus_tokens = tokenizer.encode_plus(instance,
                                                   pad_to_max_length=True,
                                                   max_length= max_seq_length)

        input_ids = encode_plus_tokens['input_ids']
        input_mask = encode_plus_tokens['attention_mask']
        segment_ids = [0] * max_seq_length

        transformed_instance = {"input_ids": input_ids, 
                                "input_mask": input_mask, 
                                "segment_ids": segment_ids}

        transformed_instances.append(transformed_instance)

    transformed_data = {"instances": transformed_instances}

#    return json.dumps(transformed_data)
    return transformed_data

In [61]:
def output_handler(log_probabilities, classes):
    import tensorflow as tf

#     response_body = response.read().decode('utf-8')

#     response_json = json.loads(response_body)

#     log_probabilities = response_json["predictions"]
    print(log_probabilities)

    predicted_classes = []

    # Convert log_probabilities => softmax (all probabilities add up to 1) => argmax (final prediction)
    for log_probability in log_probabilities:
        softmax = tf.nn.softmax(log_probability)    
        predicted_class_idx = tf.argmax(softmax, axis=-1, output_type=tf.int32)
        predicted_class = classes[predicted_class_idx]
        predicted_classes.append(predicted_class)

    return predicted_classes    

In [57]:
from transformers import DistilBertTokenizer

transformer_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
max_seq_length = 128
reviews = ["This is great!"]

transformed_input = input_handler(reviews,transformer_tokenizer,max_seq_length )

print(transformed_input)


{'instances': [{'input_ids': [101, 2023, 2003, 2307, 999, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'input_mask': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'segment_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [64]:
response_json = predictor.predict(transformed_input)
log_probabilities = response_json["predictions"]
print("log_probabilities: ", log_probabilities)
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
output_handler(log_probabilities, classes)

log_probabilities:  [[-1.28576183, -1.34811199, -1.31929684, -0.918471575, 7.13428, -1.5600096, -2.20798755, -1.78113461, -2.33376241, -2.01758361]]
[[-1.28576183, -1.34811199, -1.31929684, -0.918471575, 7.13428, -1.5600096, -2.20798755, -1.78113461, -2.33376241, -2.01758361]]


[4]