In [166]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [167]:
%store -r training_job_name

In [168]:
print(training_job_name)

tensorflow-training-2020-04-30-03-09-12-331


In [173]:
from sagemaker.tensorflow.serving import Model

# Following this example:
#    https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/tensorflow/deploying_tensorflow_serving.rst#making-predictions-against-a-sagemaker-endpoint
    
# For network isolation mode:
#    If you are working in a network-isolation situation or if you don't 
#    want to install dependencies at runtime every time your endpoint 
#    starts or a batch transform job runs, you might want to put pre-downloaded 
#    dependencies under a lib directory and this directory as dependency. The container 
#    adds the modules to the Python path. Note that if both lib and requirements.txt are
#    present in the model archive, the requirements.txt is ignored:

# If you change SAGEMAKER_TFS_DEFAULT_MODEL_NAME to something other than 'saved_model', you may see the dreaded ping error in the logs error
env = {
  'SAGEMAKER_TFS_DEFAULT_MODEL_NAME': 'saved_model' # <== change this when using multi-model,
                                                    #     but watch out for the dreaded ping/ error 
                                                    #     if the model name doesn't exist
}

model = Model(entry_point='inference.py',
              source_dir='src_inference',
              model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name),
              role=role,
              framework_version='2.1.0',
              env=env)

In [174]:
print(type(model))

<class 'sagemaker.tensorflow.serving.Model'>


In [175]:
predictor = model.deploy(initial_instance_count=1, 
                         instance_type='ml.c5.xlarge')

-------------!

# Copy the training model (not useful here)

In [178]:
# !aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz ./model.tar.gz

In [179]:
# !tar -xvzf ./model.tar.gz

# Copy the Inference Model (useful)

In [180]:
!aws s3 cp s3://$bucket/$predictor.endpoint/model.tar.gz ./model.tar.gz

fatal error: An error occurred (404) when calling the HeadObject operation: Key "tensorflow-inference-2020-04-30-20-05-22-385/model.tar.gz" does not exist


In [181]:
!tar -xvzf ./model.tar.gz

tar (child): ./model.tar.gz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now


In [182]:
!saved_model_cli show --all --dir ./tensorflow/saved_model/0/

2020-04-30 20:11:55.961610: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libnvinfer.so.6'; dlerror: libnvinfer.so.6: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.0/lib64:/usr/local/cuda-10.0/extras/CUPTI/lib64:/usr/local/cuda-10.0/lib:/usr/local/cuda-10.0/efa/lib:/opt/amazon/efa/lib:/opt/amazon/efa/lib64:/usr/lib64/openmpi/lib/:/usr/local/lib:/usr/lib:/usr/local/mpi/lib:/lib/:/usr/lib64/openmpi/lib/:/usr/local/lib:/usr/lib:/usr/local/mpi/lib:/lib/:/usr/lib64/openmpi/lib/:/usr/local/lib:/usr/lib:/usr/local/mpi/lib:/lib/:
2020-04-30 20:11:55.961692: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libnvinfer_plugin.so.6'; dlerror: libnvinfer_plugin.so.6: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-10.0/lib64:/usr/local/cuda-10.0/extras/CUPTI/lib64:/usr/local/cuda-10.0/lib:/usr/local/cuda-10.0/ef

In [334]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [387]:
class RequestHandler(object):

    def __init__(self, tokenizer, max_seq_length):
        self.tokenizer = tokenizer
        self.max_seq_length = max_seq_length

    def __call__(self, data):
        transformed_instances = []

        for instance in instances:
            tokens_a = self.tokenizer.tokenize(instance)

            # Account for [CLS] and [SEP] with "- 2"
            if len(tokens_a) > self.max_seq_length - 2:
                tokens_a = tokens_a[0:(self.max_seq_length - 2)]

            tokens = []  
            segment_ids = []
            tokens.append("[CLS]")
            segment_ids.append(0)
            for token in tokens_a:
                tokens.append(token)
                segment_ids.append(0)  
            tokens.append("[SEP]")
            segment_ids.append(0)

            input_ids = tokenizer.convert_tokens_to_ids(tokens)

            input_mask = [1] * len(input_ids)

            # Zero-pad up to the sequence length.
            while len(input_ids) < self.max_seq_length:
                input_ids.append(0)
                input_mask.append(0)
                segment_ids.append(0)

            assert len(input_ids) == self.max_seq_length
            assert len(input_mask) == self.max_seq_length
            assert len(segment_ids) == self.max_seq_length

            instance = {"input_ids": input_ids, 
                        "input_mask": input_mask, 
                        "segment_ids": segment_ids}

            transformed_instances.append(instance)

        transformed_data = {"instances": transformed_instances}

        return json.dumps(transformed_data)

In [462]:
class ResponseHandler(object):
    import tensorflow as tf

    def __init__(self, classes):
        self.classes = classes
    
    def __call__(self, response, accept_header):
        response_body = response.read().decode('utf-8')

        response_json = json.loads(response_body)

        log_probabilities = response_json["predictions"]

        predicted_classes = []

        for log_probability in log_probabilities:
            softmax = tf.nn.softmax(log_probability)    
            predicted_class_idx = tf.argmax(softmax, axis=-1, output_type=tf.int32)
            predicted_class = self.classes[predicted_class_idx]
            predicted_classes.append(predicted_class)

        return json.dumps(predicted_classes)

In [463]:
from sagemaker.tensorflow.serving import Predictor

request_handler = RequestHandler(tokenizer=tokenizer,
                                 max_seq_length=128)

response_handler = ResponseHandler(classes=[1, 2, 3, 4, 5])

endpoint_name = 'tensorflow-inference-2020-04-30-20-05-22-385'
#endpoint_name = 'training-pipeline-2020-04-30-03-15-22	'

predictor = Predictor(endpoint_name=deploy_endpoint_name,
                      sagemaker_session=sess,
                      serializer=request_handler,
                      deserializer=response_handler,
                      content_type='application/json',
                      model_name='saved_model',
                      model_version=0)

In [464]:
import tensorflow as tf
    
instances = ["This is great!", 
             "This is terrible."]

predicted_classes = predictor.predict(instances)

print(predicted_classes)

[5, 1]


# Batch Predictions
https://github.com/awslabs/amazon-sagemaker-examples/blob/master/sagemaker_batch_transform/tensorflow_cifar-10_with_inference_script/tensorflow-serving-cifar10-python-sdk.ipynb

and 

https://aws.amazon.com/blogs/machine-learning/performing-batch-inference-with-tensorflow-serving-in-amazon-sagemaker/

In [495]:
from sagemaker.tensorflow.serving import Model

# If you change SAGEMAKER_TFS_DEFAULT_MODEL_NAME to something other than 'saved_model', you may see the dreaded ping error in the logs error
batch_env = {
  'SAGEMAKER_TFS_DEFAULT_MODEL_NAME': 'saved_model', # <== change this when using multi-model,
                                                     #     but watch out for the dreaded ping/ error 
                                                     #     if the model name doesn't exist
  'SAGEMAKER_TFS_ENABLE_BATCHING': 'true',
  'SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS': '50000',
  'SAGEMAKER_TFS_MAX_BATCH_SIZE': '16'
}

batch_model = Model(entry_point='batch_inference.py',
                    source_dir='src_batch_inference',
                    model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name),
                    role=role,
                    framework_version='2.1.0',
                    env=batch_env)

In [496]:
batch_predictor = batch_model.transformer(instance_count=1, 
                                          strategy='MultiRecord', 
#                                          instance_type='local',                                          
                                          instance_type='ml.m4.xlarge',
                                          assemble_with='Line',
                                          max_concurrent_transforms=64,
                                          max_payload=1, # This is in Megabytes (not number of records)
                                          env=batch_env)

In [497]:
%store -r scikit_processing_job_name

In [498]:
print(scikit_processing_job_name)

sagemaker-scikit-learn-2020-04-28-15-47-30-110


In [499]:
# scikit_processing_job_s3_output_prefix = 'data'
print('Previous Scikit Processing Job Name: {}'.format(scikit_processing_job_name))

Previous Scikit Processing Job Name: sagemaker-scikit-learn-2020-04-28-15-47-30-110


In [500]:
prefix_test = '{}/output/bert-test'.format(scikit_processing_job_name)

test_s3_uri = 's3://{}/{}'.format(bucket, prefix_test)

In [501]:
print(test_s3_uri)

!aws s3 ls $test_s3_uri/

s3://sagemaker-us-east-1-835319576252/sagemaker-scikit-learn-2020-04-28-15-47-30-110/output/bert-test
2020-04-28 16:01:51    9748020 part-algo-1-amazon_reviews_us_Software_v1_00.tfrecord


In [502]:
batch_predictor.transform(data=test_s3_uri, 
                          split_type='TFRecord',
                          content_type='application/x-tfexample')

In [None]:
print('Waiting for batch prediction job: ' + batch_predictor.latest_transform_job.job_name)

batch_predictor.wait()

Waiting for batch prediction job: tensorflow-inference-2020-05-01-03-53-0-2020-05-01-03-53-09-899
........................[34mINFO:__main__:starting services[0m
[34mINFO:__main__:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: "saved_model",
    base_path: "/opt/ml/model/tensorflow/saved_model",
    model_platform: "tensorflow"
  }[0m
[34m}

[0m
[34mINFO:__main__:nginx config: [0m
[34mload_module modules/ngx_http_js_module.so;
[0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr error;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/json;
  access_log /dev/stdout combined;
  js_include tensorflow-serving.js;

  upstream tfs_upstream {
    server localhost:10001;
  }

  upstream gunicorn_upstream {
    server unix:/tmp/gunicorn.sock fail_timeout=1;
  }

  ser

# Check Output Data

After the transform job has completed, download the output data from S3.

For each file "f" in the input data, we have a corresponding file "f.out" containing the predicted labels from each input row. 

We can compare the predicted labels to the true labels saved earlier.


In [None]:
# Download the output data from S3 to local filesystem
batch_prediction_output_s3_uri = batch_predictor.output_path

# !mkdir -p ./batch_prediction_output

In [None]:
%%bash 

aws s3 cp --recursive $batch_prediction_output_s3_uri/ ./batch_prediction_output

ls ./batch_prediction_output