In [166]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [167]:
%store -r training_job_name


In [168]:
print(training_job_name)

tensorflow-training-2020-04-30-03-09-12-331


In [169]:
from sagemaker.tensorflow.serving import Model

# Following this example:
#    https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/tensorflow/deploying_tensorflow_serving.rst#making-predictions-against-a-sagemaker-endpoint
    
# For network isolation mode:
#    If you are working in a network-isolation situation or if you don't 
#    want to install dependencies at runtime every time your endpoint 
#    starts or a batch transform job runs, you might want to put pre-downloaded 
#    dependencies under a lib directory and this directory as dependency. The container 
#    adds the modules to the Python path. Note that if both lib and requirements.txt are
#    present in the model archive, the requirements.txt is ignored:

# If you change SAGEMAKER_TFS_DEFAULT_MODEL_NAME to something other than 'saved_model', you may see the dreaded ping error in the logs error
env = {
  'SAGEMAKER_TFS_DEFAULT_MODEL_NAME': 'saved_model' # <== change this when using multi-model,
                                                    #     but watch out for the dreaded ping/ error 
                                                    #     if the model name doesn't exist
}

model = Model(entry_point='inference.py',
              source_dir='src_inference',
              model_data='s3://{}/{}/output/model.tar.gz'.format(bucket, training_job_name),
              role=role,
              framework_version="2.1.0",
              env=env)

In [170]:
print(type(model))

<class 'sagemaker.tensorflow.serving.Model'>


In [None]:
predictor = model.deploy(initial_instance_count=1, 
                         instance_type='ml.c5.xlarge')

In [None]:
print(predictor.__dict__)

In [None]:
print(predictor.endpoint)

# Copy the training model (not useful here)

In [None]:
!aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz ./model.tar.gz

In [None]:
!tar -xvzf ./model.tar.gz

# Copy the Inference Model (useful)

In [None]:
!aws s3 cp s3://$bucket/$predictor.endpoint/model.tar.gz ./model.tar.gz

In [None]:
!tar -xvzf ./model.tar.gz

In [None]:
!saved_model_cli show --all --dir ./tensorflow/saved_model/0/

In [None]:
def convert_single_example(text, 
                           max_seq_length,
                           tokenizer):

  tokens_a = tokenizer.tokenize(text)
  print('Length of tokens_a {}'.format(len(tokens_a)))

  # Account for [CLS] and [SEP] with "- 2"
  if len(tokens_a) > max_seq_length - 2:
    tokens_a = tokens_a[0:(max_seq_length - 2)]

  # The convention in BERT is:
  # (a) For sequence pairs:
  #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
  #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1  
  # (b) For single sequences:
  #  tokens:   [CLS] the dog is hairy . [SEP]  
  #  type_ids: 0     0   0   0  0     0 0
  #
  # Where "type_ids" are used to indicate whether this is the first
  # sequence or the second sequence. The embedding vectors for `type=0` and
  # `type=1` were learned during pre-training and are added to the wordpiece
  # embedding vector (and position vector). This is not *strictly* necessary
  # since the [SEP] token unambiguously separates the sequences, but it makes  
  # it easier for the model to learn the concept of sequences.
  #  
  # For classification tasks, the first vector (corresponding to [CLS]) is
  # used as the "sentence vector". Note that this only makes sense because  
  # the entire model is fine-tuned.
  #
  tokens = []  
  segment_ids = []
  tokens.append("[CLS]")
  segment_ids.append(0)
  for token in tokens_a:
    tokens.append(token)
    segment_ids.append(0)  
  tokens.append("[SEP]")
  segment_ids.append(0)

  input_ids = tokenizer.convert_tokens_to_ids(tokens)
    
  # The mask has 1 for real tokens and 0 for padding tokens. Only real
  # tokens are attended to.
  input_mask = [1] * len(input_ids)

  # Zero-pad up to the sequence length.
  while len(input_ids) < max_seq_length:
    input_ids.append(0)
    input_mask.append(0)
    segment_ids.append(0)

  assert len(input_ids) == max_seq_length
  assert len(input_mask) == max_seq_length
  assert len(segment_ids) == max_seq_length

  return input_ids, input_mask, segment_ids

In [None]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [None]:
input_ids, input_mask, segment_ids = convert_single_example(text="""I loved it!  I will recommend this to everyone.""",
                                                            max_seq_length=128,
                                                            tokenizer=tokenizer)

instances = [{"input_ids": input_ids, 
              "input_mask": input_mask, 
              "segment_ids": segment_ids}]
data = {"instances": instances}

import numpy as np
import tensorflow as tf

log_probabilities = predictor.predict(data)['predictions'][0]
print('Log Probabilities: {}'.format(log_probabilities))

softmax = tf.nn.softmax(log_probabilities)
print('Softmax: {}'.format(softmax))

predicted_class_idx = tf.argmax(softmax, axis=-1, output_type=tf.int32)
print('Predicted Class Idx: {}'.format(predicted_class_idx))

classes = [1, 2, 3, 4, 5]

predicted_class = classes[predicted_class_idx]
print('Predicted Class: {}'.format(predicted_class))

In [None]:
input_ids, input_mask, segment_ids = convert_single_example(text="""Really bad.  I hope they don't make this anymore.""",
                                                            max_seq_length=128,
                                                            tokenizer=tokenizer)

instances = [{"input_ids": input_ids, 
              "input_mask": input_mask, 
              "segment_ids": segment_ids}]
data = {"instances": instances}

import numpy as np
import tensorflow as tf

log_probabilities = predictor.predict(data)['predictions'][0]
print('Log Probabilities: {}'.format(log_probabilities))

softmax = tf.nn.softmax(log_probabilities)
print('Softmax: {}'.format(softmax))

predicted_class_idx = tf.argmax(softmax, axis=-1, output_type=tf.int32)
print('Predicted Class Idx: {}'.format(predicted_class_idx))

classes = [1, 2, 3, 4, 5]

predicted_class = classes[predicted_class_idx]
print('Predicted Class: {}'.format(predicted_class))