In [None]:
!pip install transformers==4.2.0
!pip install torch==1.5.1 torchvision==0.6.1
!pip install --upgrade sagemaker

In [1]:
"""Launch SageMaker training job from local."""
from datetime import datetime
from sagemaker.pytorch.estimator import PyTorch
import os
import boto3
import json
import pandas as pd
import sagemaker
import torch
print(torch.__version__)

1.5.1


In [3]:
#!unzip data.zip

In [2]:

# Define AWS sessions.
sagemaker_session = sagemaker.Session()
# Define role arn with SageMaker and S3 access.
role = f"arn:aws:iam::046704982951:role/service-role/AmazonSageMaker-ExecutionRole-20210622T142702"
# Define S3 variables for data and model storage.
bucket = "sagemaker-dan-rasmussen"
model_prefix = "sagemaker/amazon_review_classifier/train"
train_file = "small_book_reviews.json"
input_path = f"s3://{bucket}/{model_prefix}/input_data/{train_file}"
output_path = f"s3://{bucket}/{model_prefix}/model"
code_path = f"s3://{bucket}/{model_prefix}/src"
# Upload data to S3 where it can be accessed by SageMaker.
sagemaker_session.upload_data(
    path=f"./data/{train_file}",
    bucket=bucket,
    key_prefix=f"{model_prefix}/input_data"
)
# Define hyperparameters which get passed as command-line args to model.py.
hyperparameters = {
    "input_path": input_path, # Where our model will read the training data.
    "model_name": "distilbert-base-uncased", # https://huggingface.co/distilbert-base-uncased
    "train_batch_size": 32,
    "valid_batch_size": 128,
    "epochs": 2,
    "learning_rate": 5e-4,
    "weight_decay": .01,
    "max_sequence_length": 128,
    'max_data_rows': 1000
}


In [4]:
# Create SageMaker estimator and laucn training job.
pytorch_estimator = PyTorch(
    entry_point='model.py', # The name of our model script.
    source_dir='./src',
    #instance_type='ml.p2.xlarge', # Instnace with GPUs.
    instance_type='ml.g4dn.xlarge', # Instnace with GPUs.
    instance_count=1,
    framework_version='1.5.0', # PyTorch version.
    py_version='py3',
    hyperparameters=hyperparameters, # Passed as command-line args to entry_point.
    code_location=code_path, # Where our source_dir gets stored in S3.
    output_path=output_path, # Where our model outputs get stored in S3.
    role=role, # Role with SageMaker access.
    sagemaker_session=sagemaker_session
)
pytorch_estimator.fit(inputs=None, job_name=f"amazon-review-model-{datetime.now().strftime('%Y%m%d%H%M%S')}")

2021-06-24 20:29:07 Starting - Starting the training job...
2021-06-24 20:29:09 Starting - Launching requested ML instancesProfilerReport-1624566546: InProgress
......
2021-06-24 20:30:36 Starting - Preparing the instances for training.........
2021-06-24 20:31:59 Downloading - Downloading input data
2021-06-24 20:31:59 Training - Downloading the training image........[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-06-24 20:33:17,000 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-06-24 20:33:17,033 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-06-24 20:33:23,256 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-06-24 20:33:23,530 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/opt/conda/bin/python3

In [5]:
# Deploy my estimator to a SageMaker Endpoint and get a Predictor
predictor = pytorch_estimator.deploy(instance_type='ml.m5.xlarge',
                                     initial_instance_count=1)

-------------!

In [30]:
from sagemaker.deserializers import JSONDeserializer
from sagemaker.serializers import JSONSerializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

text = "A Must!!! I have found this book to be very helpful in my practice and defer to the wisdom within quite often.  I think Fagbemijo has put together a wonderful piece of work that is not only based in tradition, but allows for adaptation in terms of bringing things back into balance.  While there are other books out there that provide an odu verse or two, &#34;An Exploration...&#34; gives us multiple to choose from that may fit the client's problem more explicitly.  Moreover, we aren't presented with prescriptions for obscure African herbs and offerings of 20,000 cowries. Who has time to figure out the conversion rate on that?  I initially thought the absence of remedies from the book was a bit odd, but in truth it allows for the diviner and their Ifa to determine what might fit best the problem in this day and age.  What an inspired move.  A must have.  I highly recommend!"

response = predictor.predict([text], initial_args={'ContentType': 'application/json', 'Accept': 'application/json'})
response

[[[-0.9475367665290833, 1.2128119468688965]]]

In [31]:
# Cleaning up resources
predictor.delete_endpoint()

In [22]:
import boto3
import json
from IPython.display import display

runtime = boto3.client('sagemaker-runtime')

text = "A Must!!! I have found this book to be very helpful in my practice and defer to the wisdom within quite often.  I think Fagbemijo has put together a wonderful piece of work that is not only based in tradition, but allows for adaptation in terms of bringing things back into balance.  While there are other books out there that provide an odu verse or two, &#34;An Exploration...&#34; gives us multiple to choose from that may fit the client's problem more explicitly.  Moreover, we aren't presented with prescriptions for obscure African herbs and offerings of 20,000 cowries. Who has time to figure out the conversion rate on that?  I initially thought the absence of remedies from the book was a bit odd, but in truth it allows for the diviner and their Ifa to determine what might fit best the problem in this day and age.  What an inspired move.  A must have.  I highly recommend!"
body = json.dumps([text])
display(body)

response = runtime.invoke_endpoint(
    EndpointName='pytorch-training-2021-06-24-20-52-43-616',
    ContentType='application/json',
    Accept='application/json',
    Body=body
)
display(response)
result = json.loads(response['Body'].read().decode()) 
result

'["A Must!!! I have found this book to be very helpful in my practice and defer to the wisdom within quite often.  I think Fagbemijo has put together a wonderful piece of work that is not only based in tradition, but allows for adaptation in terms of bringing things back into balance.  While there are other books out there that provide an odu verse or two, &#34;An Exploration...&#34; gives us multiple to choose from that may fit the client\'s problem more explicitly.  Moreover, we aren\'t presented with prescriptions for obscure African herbs and offerings of 20,000 cowries. Who has time to figure out the conversion rate on that?  I initially thought the absence of remedies from the book was a bit odd, but in truth it allows for the diviner and their Ifa to determine what might fit best the problem in this day and age.  What an inspired move.  A must have.  I highly recommend!"]'

{'ResponseMetadata': {'RequestId': '27cb190f-f518-4299-a791-7822587a457a',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '27cb190f-f518-4299-a791-7822587a457a',
   'x-amzn-invoked-production-variant': 'AllTraffic',
   'date': 'Thu, 24 Jun 2021 21:16:24 GMT',
   'content-type': 'application/json',
   'content-length': '45'},
  'RetryAttempts': 0},
 'ContentType': 'application/json',
 'InvokedProductionVariant': 'AllTraffic',
 'Body': <botocore.response.StreamingBody at 0x7f68882f4cc0>}

[[[-1.0770901441574097, 1.2103785276412964]]]

# Tests

In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
from src import model
from transformers import AutoModelForSequenceClassification
import json

ModuleNotFoundError: No module named 'transformers'

In [3]:
#model.get_tokenizer("distilbert-base-uncased")

In [4]:
#model.encode_sequences(["hello world", "hello world"])

In [5]:
dummy_model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=2,
    torchscript=True
)
model.save_model(dummy_model, '.')

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Save model...


  input_tensor.shape[chunk_dim] == tensor_shape for input_tensor in input_tensors


Model path: ./model.pth


In [6]:
dummy_model = model.model_fn('.')

model_fn


In [7]:
data = json.dumps(['hello world', 'hello world']).encode('utf-8')
input_data = model.input_fn(data, 'application/json')

input_fn
request_content_type: application/json
b'["hello world", "hello world"]'
['hello world', 'hello world']


In [8]:
model.predict_fn(input_data, dummy_model)

predict_fn


(tensor([[0.0137, 0.1074],
         [0.0137, 0.1074]]),)

1.4.0
