In [2]:
!pip install --upgrade pip
!pip install -q sagemaker-experiments

Collecting pip
  Using cached pip-22.0.4-py3-none-any.whl (2.1 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.2
    Uninstalling pip-21.1.2:
      Successfully uninstalled pip-21.1.2
Successfully installed pip-22.0.4
[0m

In [4]:
import sagemaker
import boto3
import json

session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = session.default_bucket()
region = session.boto_region_name
prefix = 'sagemaker-batch-transform'

In [5]:
import tensorflow as tf

In [6]:
import numpy as np
import tensorflow as tf
import os

In [7]:
from tensorflow.keras.preprocessing import sequence


In [8]:
from tensorflow.python.keras.datasets import imdb

In [9]:
max_features = 20000
maxlen = 400

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

25000 train sequences
25000 test sequences


In [10]:
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

x_train shape: (25000,)
x_test shape: (25000, 400)


In [11]:
csv_test_dir_prefix = 'imdb_data/test'
csv_test_filename = 'test.csv'
csv_test_dir = os.path.join(os.getcwd(), csv_test_dir_prefix)
os.makedirs(csv_test_dir, exist_ok=True)

np.savetxt(os.path.join(csv_test_dir, csv_test_filename), 
           np.array(x_test, dtype=np.int32), fmt='%d', delimiter=",")

test_data_s3prefix = f'{prefix}/data/csv_test'
test_data_s3 = session.upload_data(path=csv_test_dir, 
                                key_prefix=test_data_s3prefix)
print(test_data_s3)

s3://sagemaker-us-east-1-104877823522/sagemaker-batch-transform/data/csv_test


In [12]:
from sagemaker.tensorflow import TensorFlow

training_job_name = 'imdb-tf-2022-04-16-18-40-44'
estimator = TensorFlow.attach(training_job_name)


2022-04-16 18:58:22 Starting - Preparing the instances for training
2022-04-16 18:58:22 Downloading - Downloading input data
2022-04-16 18:58:22 Training - Training image download completed. Training in progress.
2022-04-16 18:58:22 Uploading - Uploading generated training model
2022-04-16 18:58:22 Completed - Training job completed


In [13]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from botocore.exceptions import ClientError
from time import strftime, gmtime

experiment_name = 'imdb-test7-sentiment-analysis'
exp_datetime = strftime('%Y-%m-%d-%H-%M-%S', gmtime())

job_name = f'imdb-tf-bt-test7-{exp_datetime}'

exp_trial = Trial.load(trial_name = training_job_name)

experiment_config = {
    'ExperimentName':experiment_name,
    'TrialName':exp_trial.trial_name,
    'TrialComponentDisplayName':'Inference-test7-BatchTransform'    
}

In [16]:

exp_datetime = strftime('%Y-%d-%m-%H-%M-%S', gmtime())

job_name = f'imdb-tf-bt-test7-{exp_datetime}'

s3_output_location = f's3://{bucket}/{prefix}/{job_name}'

transformer = estimator.transformer(instance_count=1,
                                    instance_type='ml.c5.2xlarge',
                                    max_payload=2,
                                    accept='application/jsonlines',
                                    output_path=s3_output_location,
                                    assemble_with='Line')

transformer.transform(
    test_data_s3, 
    content_type='text/csv', 
    split_type = 'Line', 
    job_name = job_name,
    experiment_config = experiment_config)

print('Waiting for transform job: ' + transformer.latest_transform_job.job_name)


INFO:sagemaker:Creating model with name: imdb-tf-2022-04-16-18-40-44-2022-04-17-04-35-11-128
INFO:sagemaker:Creating transform job with name: imdb-tf-bt-test7-2022-17-04-04-35-11


.......................[34mINFO:__main__:starting services[0m
[34mINFO:tfs_utils:using default model name: model[0m
[34mINFO:tfs_utils:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: "model",
    base_path: "/opt/ml/model",
    model_platform: "tensorflow"
  }[0m
[34m}[0m
[34mINFO:__main__:using default model name: model[0m
[34mINFO:__main__:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: "model",
    base_path: "/opt/ml/model",
    model_platform: "tensorflow"
  }[0m
[34m}[0m
[34mINFO:__main__:tensorflow version info:[0m
[34mTensorFlow ModelServer: 2.1.0-rc1+dev.sha.d80de10[0m
[34mTensorFlow Library: 2.1.1[0m
[34mINFO:__main__:tensorflow serving command: tensorflow_model_server --port=10000 --rest_api_port=10001 --model_config_file=/sagemaker/model-config.cfg --max_num_load_retries=0 [0m
[34mINFO:__main__:started tensorflow serving (pid: 12)[0m
[34mINFO:__main__:nginx config: [0m
[34

UnexpectedStatusException: Error for Transform job imdb-tf-bt-test7-2022-17-04-04-35-11: Failed. Reason: ClientError: See job logs for more information

In [41]:
output = transformer.output_path
output_prefix = 'imdb_data/test_output'
!mkdir -p {output_prefix}

!aws s3 cp --recursive {output} {output_prefix}
!head {output_prefix}/{csv_test_filename}.out

download: s3://sagemaker-us-east-1-104877823522/sagemaker-batch-transform/imdb-tf-bt-test7-2022-17-04-04-35-11/test.csv.out to imdb_data/test_output/test.csv.out
{    "predictions": [[3.14312785e-14], [1.0], [0.134162009], [0.999753416], [0.999974132], [0.999915], [0.991619], [1.06336381e-12], [0.997953773], [1.0], [0.999909759], [2.19002941e-05], [1.59515456e-11], [0.999829054], [1.0], [7.4119225e-23], [0.999998927], [1.0], [2.23425636e-17], [2.52036648e-06], [1.0], [1.0], [0.0446629636], [1.0], [0.999777138], [0.999966383], [8.18169144e-10], [0.87162137], [1.0], [4.21393883e-20], [1.0], [0.973645806], [1.0], [5.93713504e-16], [2.56153118e-15], [1.71378779e-11], [1.0], [0.996868193], [0.210237771], [3.88957772e-20], [1.0], [1.0], [1.64552244e-16], [0.999997497], [1.0], [0.999998689], [0.00767789735], [1.20005489e-19], [4.09502254e-09], [0.999831438], [1.21541855e-13], [0.962914526], [1.0], [1.0], [0.999999285], [0.160042971], [0.000249715842], [0.999999881], [1.88681319e-12], [8.11618

In [42]:
results = []

with open(f'{output_prefix}/{csv_test_filename}.out', 'r') as f:
    lines = f.readlines()
    for line in lines:
        print(line)
        json_output = json.load(line)
        result = [float('%.3f'%(item)) for sublist in json_output['predictions'] 
                                       for item in sublist]
        results += result
        
print(results)

{    "predictions": [[3.14312785e-14], [1.0], [0.134162009], [0.999753416], [0.999974132], [0.999915], [0.991619], [1.06336381e-12], [0.997953773], [1.0], [0.999909759], [2.19002941e-05], [1.59515456e-11], [0.999829054], [1.0], [7.4119225e-23], [0.999998927], [1.0], [2.23425636e-17], [2.52036648e-06], [1.0], [1.0], [0.0446629636], [1.0], [0.999777138], [0.999966383], [8.18169144e-10], [0.87162137], [1.0], [4.21393883e-20], [1.0], [0.973645806], [1.0], [5.93713504e-16], [2.56153118e-15], [1.71378779e-11], [1.0], [0.996868193], [0.210237771], [3.88957772e-20], [1.0], [1.0], [1.64552244e-16], [0.999997497], [1.0], [0.999998689], [0.00767789735], [1.20005489e-19], [4.09502254e-09], [0.999831438], [1.21541855e-13], [0.962914526], [1.0], [1.0], [0.999999285], [0.160042971], [0.000249715842], [0.999999881], [1.88681319e-12], [8.11618923e-15], [0.000505397329], [1.04703475e-10], [0.999699712], [6.32346675e-10], [5.42445378e-09], [1.0], [0.259758383], [0.995647], [1.0], [1.0], [0.999999881], [1

AttributeError: 'str' object has no attribute 'read'

In [43]:
def get_sentiment(score):
    return 'positive' if score > 0.5 else 'negative' 

In [44]:
import re

regex = re.compile(r'^[\?\s]+')
word_index = imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [45]:
data_index=199
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
first_decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') 
                                 for i in x_test[data_index]])
regex.sub('', first_decoded_review)

"i watched this movie purely for the setting it was filmed in an old hotel that a friend owns shares of the plot was predictable the acting was ? at best the scares were all gross outs not true scares br br i don't remember much of the plot and i think that's because there wasn't much of one to remember they didn't even use the hotel to it's fullest potential the beaches are fantastic and the hotel is situated on a ? at low tide you can walk almost 1 4 mile into the bay which is actually an eerie sight first thing in the morning or late at night when the wind is howling through the cracks br br the best way to see this movie is with the remote in your hand so you can fast forward through the action and i'm using that term ? scenes and pause at the beauty of the surroundings"

In [46]:
print(f'Labeled sentiment for this review is {get_sentiment(y_test[data_index])}')
print(f'Predicted sentiment is {get_sentiment(results[data_index])}')

Labeled sentiment for this review is negative


IndexError: list index out of range