In [2]:
import os
import pandas as pd
import numpy as np
import keras
import boto3
import sagemaker
import tensorflow as tf
from keras.datasets import imdb

Using TensorFlow backend.


In [3]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [4]:
df_train = pd.DataFrame({'sentences': X_train, 'sentiment': y_train}, columns=['sentences', 'sentiment'])[0:2000]
df_test = pd.DataFrame({'sentences': X_test, 'sentiment': y_test}, columns=['sentences', 'sentiment'])[0:2000]

os.makedirs('./data', exist_ok=True)

df_train.to_csv('data/train.csv', index=False)
df_test.to_csv('data/test.csv', index=False)

sagemaker_session = sagemaker.Session(default_bucket='my-sagemaker-data-bucket')
prefix = 'sentient_classification'
training_input_path = sagemaker_session.upload_data('data/train.csv', key_prefix=prefix+'/train')
testing_input_path = sagemaker_session.upload_data('data/test.csv', key_prefix=prefix+'/test')

In [5]:
tf_version = tf.__version__
role = sagemaker.get_execution_role()

from sagemaker.tensorflow import TensorFlow
import subprocess

instance_type = 'local'#'ml.p3.8xlarge'
# Check if local GPU is present
if subprocess.call('nvidia-smi') == 0:
    print('GPU present')
    instance_type = 'local_gpu'

tf_estimator = TensorFlow(entry_point='train_sentiment.py',
                          role=role,
                          train_instance_count=2,
                          instance_type=instance_type,
                          py_version='py37',
                          framework_version='2.3.2',
                          distribution={
                              'mpi': {
                                  'enabled': True,
                                  'processes_per_host': 1
                              }
                          },
                          hyperparameters={
                              'epochs': 1,
                              'batch-size': 32
                          })

tf_estimator.fit({'train': training_input_path})

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


Creating veo4szvq41-algo-1-brzc3 ... 
Creating tvploak8d5-algo-2-brzc3 ... 
Creating veo4szvq41-algo-1-brzc3 ... done
Creating tvploak8d5-algo-2-brzc3 ... done
Attaching to veo4szvq41-algo-1-brzc3, tvploak8d5-algo-2-brzc3
[36mtvploak8d5-algo-2-brzc3 |[0m 2021-12-21 05:22:26.017037: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
[36mtvploak8d5-algo-2-brzc3 |[0m 2021-12-21 05:22:26.017469: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
[33mveo4szvq41-algo-1-brzc3 |[0m 2021-12-21 05:22:26.017044: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
[33mveo4szvq41-algo-1-brzc3 |[0m 2021-12-21 05:22:26.017471: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be

In [6]:
sentiment_predictor = tf_estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

update_endpoint is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


Attaching to 2jcjfb5qdd-algo-1-7f189
[36m2jcjfb5qdd-algo-1-7f189 |[0m INFO:__main__:PYTHON SERVICE: False
[36m2jcjfb5qdd-algo-1-7f189 |[0m INFO:__main__:starting services
[36m2jcjfb5qdd-algo-1-7f189 |[0m INFO:__main__:using default model name: model
[36m2jcjfb5qdd-algo-1-7f189 |[0m INFO:__main__:tensorflow serving model config: 
[36m2jcjfb5qdd-algo-1-7f189 |[0m model_config_list: {
[36m2jcjfb5qdd-algo-1-7f189 |[0m   config: {
[36m2jcjfb5qdd-algo-1-7f189 |[0m     name: 'model'
[36m2jcjfb5qdd-algo-1-7f189 |[0m     base_path: '/opt/ml/model'
[36m2jcjfb5qdd-algo-1-7f189 |[0m     model_platform: 'tensorflow'
[36m2jcjfb5qdd-algo-1-7f189 |[0m     model_version_policy: {
[36m2jcjfb5qdd-algo-1-7f189 |[0m       specific: {
[36m2jcjfb5qdd-algo-1-7f189 |[0m         versions: 1
[36m2jcjfb5qdd-algo-1-7f189 |[0m       }
[36m2jcjfb5qdd-algo-1-7f189 |[0m     }
[36m2jcjfb5qdd-algo-1-7f189 |[0m   }
[36m2jcjfb5qdd-algo-1-7f189 |[0m }
[36m2jcjfb5qdd-algo-1-7f189 |[0m 
[36

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_word_index = max([max(sentence) for sentence in df_test['sentences']])
max_sentence_length = max([len(sentence) for sentence in df_test['sentences']])
max_sentence_length = 1038
X_test_padded = pad_sequences(df_test['sentences'], maxlen=int(float(max_sentence_length)), padding='post')

predictions = sentiment_predictor.predict(X_test_padded[0:50])
predictions

{'predictions': [[0.537367404],
  [0.958817422],
  [0.998868346],
  [0.034765929],
  [0.999992073],
  [0.689996243],
  [0.1218822],
  [0.057597667],
  [0.758380413],
  [0.960324168],
  [0.045324266],
  [0.0739206672],
  [0.446586251],
  [0.699089289],
  [0.97275573],
  [0.0171760321],
  [0.983237267],
  [0.62255621],
  [0.443494409],
  [0.576273799],
  [0.961864948],
  [0.999987245],
  [0.954569936],
  [0.271190047],
  [0.380440563],
  [0.0614178181],
  [0.832692444],
  [0.582939],
  [0.440390557],
  [0.246110827],
  [0.999573],
  [0.0354868472],
  [0.986569762],
  [0.0166225731],
  [0.897319198],
  [0.0101800561],
  [0.98902142],
  [0.99781388],
  [0.078352809],
  [0.010755986],
  [0.998691797],
  [0.997918665],
  [0.085237056],
  [0.497526854],
  [0.998014092],
  [0.0895275772],
  [0.0918046534],
  [0.34136498],
  [0.32637316],
  [0.298807502]]}

[36m2jcjfb5qdd-algo-1-7f189 |[0m 172.18.0.1 - - [21/Dec/2021:05:24:12 +0000] "POST /invocations HTTP/1.1" 200 769 "-" "python-urllib3/1.26.7"
