In [1]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

# Install TensorFlow and Transformers

In [2]:
!pip list

Package                            Version   
---------------------------------- ----------
absl-py                            0.9.0     
alabaster                          0.7.10    
anaconda-client                    1.6.14    
anaconda-project                   0.8.2     
asn1crypto                         0.24.0    
astor                              0.8.1     
astroid                            1.6.3     
astropy                            3.0.2     
attrs                              18.1.0    
Automat                            0.3.0     
autovizwidget                      0.15.0    
awscli                             1.18.39   
Babel                              2.5.3     
backcall                           0.1.0     
backports.shutil-get-terminal-size 1.0.0     
bcrypt                             3.1.7     
beautifulsoup4                     4.6.0     
bitarray                           0.8.1     
bkcharts                           0.2       
blaze        

In [3]:
!pip install -q pip --upgrade
!pip install -q wrapt --upgrade --ignore-installed
!pip install -q tensorflow==2.1.0 --upgrade --ignore-installed
!pip install -q transformers==2.8.0

[31mERROR: awscli 1.18.39 has requirement rsa<=3.5.0,>=3.1.2, but you'll have rsa 4.0 which is incompatible.[0m


# Retrieve the Training Job Name

In [4]:
%store -r training_job_name

In [5]:
print('Previous training_job_name: {}'.format(training_job_name))

Previous training_job_name: tensorflow-training-2020-04-25-18-34-53-740


# Download the Trained Model

In [6]:
models_dir = './models'

In [7]:
# Download the model and output artifacts from AWS S3
!aws s3 cp s3://$bucket/$training_job_name/output/model.tar.gz $models_dir/model.tar.gz

download: s3://sagemaker-us-west-2-086401037028/tensorflow-training-2020-04-25-18-34-53-740/output/model.tar.gz to models/model.tar.gz


In [8]:
import tarfile
import pickle as pkl

#!ls -al ./models

tar = tarfile.open('{}/model.tar.gz'.format(models_dir))
tar.extractall(path=models_dir)
tar.close()

In [9]:
!ls -al $models_dir

total 908908
drwxrwxr-x  4 ec2-user ec2-user      4096 Apr 25 19:37 .
drwxrwxr-x 10 ec2-user ec2-user      4096 Apr 25 19:37 ..
-rw-rw-r--  1 ec2-user ec2-user 930698988 Apr 25 18:45 model.tar.gz
drwxr-xr-x  3 ec2-user ec2-user      4096 Apr 25 18:38 tensorflow
drwxr-xr-x  3 ec2-user ec2-user      4096 Apr 25 18:38 transformers


# Load the Model

In [10]:
transformer_model_dir = '{}/transformers/fine-tuned/'.format(models_dir)

!ls -al $transformer_model_dir

total 261692
drwxr-xr-x 2 ec2-user ec2-user      4096 Apr 25 18:43 .
drwxr-xr-x 3 ec2-user ec2-user      4096 Apr 25 18:38 ..
-rw-r--r-- 1 ec2-user ec2-user      1358 Apr 25 18:43 config.json
-rw-r--r-- 1 ec2-user ec2-user 267959068 Apr 25 18:43 tf_model.h5


In [11]:
cat $transformer_model_dir/config.json

{
  "_num_labels": 5,
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "bad_words_ids": null,
  "bos_token_id": null,
  "decoder_start_token_id": null,
  "dim": 768,
  "do_sample": false,
  "dropout": 0.1,
  "early_stopping": false,
  "eos_token_id": null,
  "finetuning_task": null,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "length_penalty": 1.0,
  "max_length": 20,
  "max_position_embeddings": 512,
  "min_length": 0,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "no_repeat_ngram_size": 0,
  "num_beams": 1,
  "num_return_sequences": 1,
  "output_attentions": false,
  "output

In [12]:
import os
from transformers import TFDistilBertForSequenceClassification

loaded_model = TFDistilBertForSequenceClassification.from_pretrained(transformer_model_dir,
                                                                     id2label={
                                                                       0: 1,
                                                                       1: 2,
                                                                       2: 3,
                                                                       3: 4,
                                                                       4: 5
                                                                     },
                                                                     label2id={
                                                                       1: 0,
                                                                       2: 1,
                                                                       3: 2,
                                                                       4: 3,
                                                                       5: 4
                                                                     })

In [13]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

inference_device = -1 # CPU: -1, GPU: 0
print('inference_device {}'.format(inference_device))

inference_device -1


In [14]:
from transformers import TextClassificationPipeline

inference_pipeline = TextClassificationPipeline(model=loaded_model, 
                                                tokenizer=tokenizer,
                                                framework='tf',
                                                device=inference_device) # -1 is CPU, >= 0 is GPU

In [15]:
review = """I loved it!  I will recommend this to everyone."""
print(review, inference_pipeline(review))

I loved it!  I will recommend this to everyone. [{'label': 5, 'score': 0.91903657}]


In [16]:
review = """Really bad.  I hope they don't make this anymore."""
print(review, inference_pipeline(review))

Really bad.  I hope they don't make this anymore. [{'label': 1, 'score': 0.7303662}]


In [19]:
review = """I am not sure but it is good."""
print(review, inference_pipeline(review))

I am not sure but it is good. [{'label': 4, 'score': 0.43670082}]
