# Deploying A BERT-based Classifier

This model analyzes customer feedback and classifies the messages into positive (1), neutral (0), and negative (-1) sentiments.

In [1]:
!pip install --disable-pip-version-check -q sagemaker==2.35.0
!conda install -q -y pytorch==1.6.0 -c pytorch
!pip install --disable-pip-version-check -q transformers==3.5.1

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [2]:
import boto3
import sagemaker
import pandas as pd
import botocore

config = botocore.config.Config(user_agent_extra='dlai-pds/c3/w1')

sm = boto3.client(service_name='sagemaker', 
                  config=config)

sess = sagemaker.Session(sagemaker_client=sm)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

<a name='c3w1-1.'></a>
# Configure dataset and Hyperparameter Tuning Job (HTP)

<a name='c3w1-1.1.'></a>
Set up the paths and copy the data to the S3 bucket:

In [3]:
processed_train_data_s3_uri = 's3://{}/transformed/data/sentiment-train/'.format(bucket)
processed_validation_data_s3_uri = 's3://{}/transformed/data/sentiment-validation/'.format(bucket)
processed_test_data_s3_uri = 's3://{}/transformed/data/sentiment-test/'.format(bucket)

Upload the data to the S3 bucket:

In [4]:
!aws s3 cp --recursive ./data/sentiment-train $processed_train_data_s3_uri
!aws s3 cp --recursive ./data/sentiment-validation $processed_validation_data_s3_uri
!aws s3 cp --recursive ./data/sentiment-test $processed_test_data_s3_uri

upload: data/sentiment-train/part-algo-1-womens_clothing_ecommerce_reviews.tsv to s3://sagemaker-us-east-1-593402094095/transformed/data/sentiment-train/part-algo-1-womens_clothing_ecommerce_reviews.tsv
upload: data/sentiment-validation/part-algo-1-womens_clothing_ecommerce_reviews.tsv to s3://sagemaker-us-east-1-593402094095/transformed/data/sentiment-validation/part-algo-1-womens_clothing_ecommerce_reviews.tsv
upload: data/sentiment-test/part-algo-1-womens_clothing_ecommerce_reviews.tsv to s3://sagemaker-us-east-1-593402094095/transformed/data/sentiment-test/part-algo-1-womens_clothing_ecommerce_reviews.tsv


Check the existence of those files in the S3 bucket:

In [5]:
!aws s3 ls --recursive $processed_train_data_s3_uri

2021-09-16 08:57:37    4894416 transformed/data/sentiment-train/part-algo-1-womens_clothing_ecommerce_reviews.tsv


In [6]:
!aws s3 ls --recursive $processed_validation_data_s3_uri

2021-09-16 08:57:37     276522 transformed/data/sentiment-validation/part-algo-1-womens_clothing_ecommerce_reviews.tsv


In [7]:
!aws s3 ls --recursive $processed_test_data_s3_uri

2021-09-16 08:57:38     273414 transformed/data/sentiment-test/part-algo-1-womens_clothing_ecommerce_reviews.tsv


<a name='c3w1-ex-1'></a>

Set up a dictionary of the input training and validation data channels, wrapping the corresponding S3 locations in a "TrainingInput" object.

In [8]:
from sagemaker.inputs import TrainingInput

data_channels = {
    'train': processed_train_data_s3_uri,
    'validation': processed_validation_data_s3_uri
}

<a name='c3w1-1.2.'></a>
### Configure Hyperparameter Tuning Job

In [9]:
max_seq_length=128 
freeze_bert_layer=False 
epochs=3
train_steps_per_epoch=50
validation_batch_size=64
validation_steps_per_epoch=50
seed=42

train_instance_count=1
train_instance_type='ml.c5.9xlarge'
train_volume_size=256
input_mode='File'
run_validation=True

Set up the dictionary for some of these to be be passed into the PyTorch estimator and tuner in the hyperparameters argument:

In [10]:
hyperparameters_static={
    'freeze_bert_layer': freeze_bert_layer,
    'max_seq_length': max_seq_length,
    'epochs': epochs,
    'train_steps_per_epoch': train_steps_per_epoch,
    'validation_batch_size': validation_batch_size,
    'validation_steps_per_epoch': validation_steps_per_epoch,
    'seed': seed,
    'run_validation': run_validation
}

Configure hyperparameter ranges to explore in the Tuning Job. 

In [11]:
from sagemaker.tuner import IntegerParameter
from sagemaker.tuner import ContinuousParameter
from sagemaker.tuner import CategoricalParameter
                                                
hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.00001, 0.00005, scaling_type='Linear'), # specifying continuous variable type, the tuning job will explore the range of values
    'train_batch_size': CategoricalParameter([128, 256]), # specifying categorical variable type, the tuning job will explore only listed values
}

<a name='c3w1-1.3.'></a>
### Set up evaluation metrics

In [12]:
metric_definitions = [
     {'Name': 'validation:loss', 'Regex': 'val_loss: ([0-9.]+)'},
     {'Name': 'validation:accuracy', 'Regex': 'val_acc: ([0-9.]+)'},
]

<a name='c3w1-2.1.'></a>
### Set up the RoBERTa and PyTorch script to run on SageMaker

In [13]:
from sagemaker.pytorch import PyTorch as PyTorchEstimator
# Rename the PyTorch estimator for clarity purposes downstream

estimator = PyTorchEstimator(
    entry_point='train.py',
    source_dir='src',
    role=role,
    instance_count=train_instance_count,
    instance_type=train_instance_type,
    volume_size=train_volume_size,
    py_version='py3',
    framework_version='1.6.0',
    hyperparameters=hyperparameters_static,
    metric_definitions=metric_definitions,
    input_mode=input_mode,
)

<a name='c3w1-ex-2'></a>
### Set up the Hyperparameter Tuner.

In [14]:
from sagemaker.tuner import HyperparameterTuner

tuner = HyperparameterTuner(
    estimator=estimator,
    hyperparameter_ranges=hyperparameter_ranges,
    metric_definitions=metric_definitions,
    strategy='Random',
    objective_type='Maximize',
    objective_metric_name='validation:accuracy',
    max_jobs=2, 
    max_parallel_jobs=2, 
    early_stopping_type='Auto'
)

<a name='c3w1-ex-3'></a>
### Launch the SageMaker Hyper-Parameter Tuning (HPT) Job


In [15]:
tuner.fit(
    inputs=data_channels,
    include_cls_metadata=False,
    wait=False
)

<a name='c3w1-2.3.'></a>
### Check Tuning Job status

In [16]:
tuning_job_name = tuner.latest_tuning_job.job_name
print(tuning_job_name)

pytorch-training-210916-0857


In [17]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/hyper-tuning-jobs/{}">Hyper-Parameter Tuning Job</a></b>'.format(region, tuning_job_name)))

In [18]:
%%time

tuner.wait()

..................................................................................................................................................................................................................................................................................................................................................................!
CPU times: user 1.38 s, sys: 226 ms, total: 1.6 s
Wall time: 29min 48s


In [19]:
import time

time.sleep(10) # slight delay to allow the analytics to be calculated

df_results = tuner.analytics().dataframe()
df_results.shape

(2, 8)

In [20]:
df_results.sort_values('FinalObjectiveValue', ascending=0)

Unnamed: 0,learning_rate,train_batch_size,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,2e-05,"""128""",pytorch-training-210916-0857-002-fd13714c,Completed,70.699997,2021-09-16 09:00:24+00:00,2021-09-16 09:25:10+00:00,1486.0
1,1.5e-05,"""128""",pytorch-training-210916-0857-001-fa4b6e1a,Stopped,37.110001,2021-09-16 09:01:06+00:00,2021-09-16 09:10:10+00:00,544.0


In [21]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review Training Jobs of the <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/hyper-tuning-jobs/{}">Hyper-Parameter Tuning Job</a></b>'.format(region, tuning_job_name)))

<a name='c3w1-3.'></a>
# Evaluate the results

<a name='c3w1-ex-4'></a>
### Show the best candidate

In [22]:
df_results.sort_values(
    'FinalObjectiveValue',
    ascending=0).head(1)

Unnamed: 0,learning_rate,train_batch_size,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,2e-05,"""128""",pytorch-training-210916-0857-002-fd13714c,Completed,70.699997,2021-09-16 09:00:24+00:00,2021-09-16 09:25:10+00:00,1486.0


<a name='c3w1-3.2.'></a>
### Evaluate  best candidate

In [23]:
best_candidate = df_results.sort_values('FinalObjectiveValue', ascending=0).iloc[0]

best_candidate_training_job_name = best_candidate['TrainingJobName']
print('Best candidate Training Job name: {}'.format(best_candidate_training_job_name))

Best candidate Training Job name: pytorch-training-210916-0857-002-fd13714c


<a name='c3w1-ex-5'></a>
### Show accuracy result for the best candidate

In [24]:
best_candidate_accuracy = best_candidate['FinalObjectiveValue']

print('Best candidate accuracy result: {}'.format(best_candidate_accuracy))

Best candidate accuracy result: 70.69999694824219


In [25]:
best_candidate_description = sm.describe_training_job(TrainingJobName=best_candidate_training_job_name)

best_candidate_training_job_name2 = best_candidate_description['TrainingJobName']

print('Training Job name: {}'.format(best_candidate_training_job_name2))

Training Job name: pytorch-training-210916-0857-002-fd13714c


<a name='c3w1-ex-6'></a>
### Pull the Tuning Job and Training Job Amazon Resource Name (ARN) from the best candidate training job description

In [26]:
print(best_candidate_description.keys())

dict_keys(['TrainingJobName', 'TrainingJobArn', 'TuningJobArn', 'ModelArtifacts', 'TrainingJobStatus', 'SecondaryStatus', 'HyperParameters', 'AlgorithmSpecification', 'RoleArn', 'InputDataConfig', 'OutputDataConfig', 'ResourceConfig', 'StoppingCondition', 'CreationTime', 'TrainingStartTime', 'TrainingEndTime', 'LastModifiedTime', 'SecondaryStatusTransitions', 'FinalMetricDataList', 'EnableNetworkIsolation', 'EnableInterContainerTrafficEncryption', 'EnableManagedSpotTraining', 'TrainingTimeInSeconds', 'BillableTimeInSeconds', 'ProfilingStatus', 'ResponseMetadata'])


In [27]:
best_candidate_tuning_job_arn = best_candidate_description['TuningJobArn'] 
best_candidate_training_job_arn = best_candidate_description['TrainingJobArn'] 
print('Best candidate Tuning Job ARN: {}'.format(best_candidate_tuning_job_arn))
print('Best candidate Training Job ARN: {}'.format(best_candidate_training_job_arn))

Best candidate Tuning Job ARN: arn:aws:sagemaker:us-east-1:593402094095:hyper-parameter-tuning-job/pytorch-training-210916-0857
Best candidate Training Job ARN: arn:aws:sagemaker:us-east-1:593402094095:training-job/pytorch-training-210916-0857-002-fd13714c


Pull the path of the best candidate model in the S3 bucket to prep for later-- setting up the Processing Job for the evaluation

In [28]:
model_tar_s3_uri = sm.describe_training_job(TrainingJobName=best_candidate_training_job_name)['ModelArtifacts']['S3ModelArtifacts']
print(model_tar_s3_uri)

s3://sagemaker-us-east-1-593402094095/pytorch-training-210916-0857-002-fd13714c/output/model.tar.gz


Model evaluation you using a scikit-learn-based Processing Job

In [29]:
from sagemaker.sklearn.processing import SKLearnProcessor

processing_instance_type = "ml.c5.2xlarge"
processing_instance_count = 1

processor = SKLearnProcessor(
    framework_version="0.23-1",
    role=role,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    max_runtime_in_seconds=7200,
)

Launch the Processing Job, passing the defined above parameters, custom script, path and the S3 bucket location of the test data.

In [30]:
from sagemaker.processing import ProcessingInput, ProcessingOutput

processor.run(
    code="src/evaluate_model_metrics.py",
    inputs=[
        ProcessingInput(  
            input_name="model-tar-s3-uri",                        
            source=model_tar_s3_uri,                               
            destination="/opt/ml/processing/input/model/"
        ),
        ProcessingInput(
            input_name="evaluation-data-s3-uri",
            source=processed_test_data_s3_uri,                                    
            destination="/opt/ml/processing/input/data/",
        ),
    ],
    outputs=[
        ProcessingOutput(s3_upload_mode="EndOfJob", output_name="metrics", source="/opt/ml/processing/output/metrics"),
    ],
    arguments=["--max-seq-length", str(max_seq_length)],
    logs=True,
    wait=False,
)


Job Name:  sagemaker-scikit-learn-2021-09-16-09-27-40-683
Inputs:  [{'InputName': 'model-tar-s3-uri', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-593402094095/pytorch-training-210916-0857-002-fd13714c/output/model.tar.gz', 'LocalPath': '/opt/ml/processing/input/model/', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'evaluation-data-s3-uri', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-593402094095/transformed/data/sentiment-test/', 'LocalPath': '/opt/ml/processing/input/data/', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'code', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-593402094095/sagemaker-scikit-learn-2021-09-16-09-27-40-683/input/code/evaluate_model_metrics.py', 'LocalPath': '/opt/ml/processing/input/code', 'S3DataType': 'S

In [31]:
scikit_processing_job_name = processor.jobs[-1].describe()["ProcessingJobName"]

print('Processing Job name: {}'.format(scikit_processing_job_name))

Processing Job name: sagemaker-scikit-learn-2021-09-16-09-27-40-683


<a name='c3w1-ex-7'></a>
### Pull the Processing Job status from the Processing Job description

In [32]:
print(processor.jobs[-1].describe().keys())

dict_keys(['ProcessingInputs', 'ProcessingOutputConfig', 'ProcessingJobName', 'ProcessingResources', 'StoppingCondition', 'AppSpecification', 'RoleArn', 'ProcessingJobArn', 'ProcessingJobStatus', 'LastModifiedTime', 'CreationTime', 'ResponseMetadata'])


In [33]:
scikit_processing_job_status = processor.jobs[-1].describe()['ProcessingJobStatus'] 
print('Processing job status: {}'.format(scikit_processing_job_status))

Processing job status: InProgress


Review the created Processing Job in the AWS console.

In [34]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/processing-jobs/{}">Processing Job</a></b>'.format(
            region, scikit_processing_job_name
        )
    )
)

In [35]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://console.aws.amazon.com/cloudwatch/home?region={}#logStream:group=/aws/sagemaker/ProcessingJobs;prefix={};streamFilter=typeLogStreamPrefix">CloudWatch Logs</a> after about 5 minutes</b>'.format(
            region, scikit_processing_job_name
        )
    )
)

In [36]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://s3.console.aws.amazon.com/s3/buckets/{}/{}/?region={}&tab=overview">S3 output data</a> after the Processing Job has completed</b>'.format(
            bucket, scikit_processing_job_name, region
        )
    )
)

Monitor the Processing Job:

In [37]:
from pprint import pprint

running_processor = sagemaker.processing.ProcessingJob.from_processing_name(
    processing_job_name=scikit_processing_job_name, sagemaker_session=sess
)

processing_job_description = running_processor.describe()

pprint(processing_job_description)

{'AppSpecification': {'ContainerArguments': ['--max-seq-length', '128'],
                      'ContainerEntrypoint': ['python3',
                                              '/opt/ml/processing/input/code/evaluate_model_metrics.py'],
                      'ImageUri': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3'},
 'CreationTime': datetime.datetime(2021, 9, 16, 9, 27, 41, 193000, tzinfo=tzlocal()),
 'LastModifiedTime': datetime.datetime(2021, 9, 16, 9, 27, 41, 610000, tzinfo=tzlocal()),
 'ProcessingInputs': [{'AppManaged': False,
                       'InputName': 'model-tar-s3-uri',
                       'S3Input': {'LocalPath': '/opt/ml/processing/input/model/',
                                   'S3CompressionType': 'None',
                                   'S3DataDistributionType': 'FullyReplicated',
                                   'S3DataType': 'S3Prefix',
                                   'S3InputMode': 'File',
                     

In [38]:
%%time

running_processor.wait(logs=False)

....................................................................................................!CPU times: user 379 ms, sys: 62.3 ms, total: 441 ms
Wall time: 8min 22s


<a name='c3w1-3.3.'></a>
### Inspect the processed output data

In [39]:
processing_job_description = running_processor.describe()

output_config = processing_job_description["ProcessingOutputConfig"]
for output in output_config["Outputs"]:
    if output["OutputName"] == "metrics":
        processed_metrics_s3_uri = output["S3Output"]["S3Uri"]

print(processed_metrics_s3_uri)

s3://sagemaker-us-east-1-593402094095/sagemaker-scikit-learn-2021-09-16-09-27-40-683/output/metrics


List the content of the folder:

In [40]:
!aws s3 ls $processed_metrics_s3_uri/

2021-09-16 09:35:57      21158 confusion_matrix.png
2021-09-16 09:35:57         56 evaluation.json


Pulled the test accuracy from the "evaluation.json" file.

In [41]:
import json
from pprint import pprint

metrics_json = sagemaker.s3.S3Downloader.read_file("{}/evaluation.json".format(
    processed_metrics_s3_uri
))

print('Test accuracy: {}'.format(json.loads(metrics_json)))

Test accuracy: {'metrics': {'accuracy': {'value': 0.7249190938511327}}}


Copy image with the confusion matrix generated during the model evaluation into the folder "generated".

In [42]:
!aws s3 cp $processed_metrics_s3_uri/confusion_matrix.png ./generated/

import time
time.sleep(10) # Slight delay for our notebook to recognize the newly-downloaded file

download: s3://sagemaker-us-east-1-593402094095/sagemaker-scikit-learn-2021-09-16-09-27-40-683/output/metrics/confusion_matrix.png to generated/confusion_matrix.png


Show and review the confusion matrix.

In [43]:
%%html

<img src='./generated/confusion_matrix.png'>

In [44]:
!aws s3 cp ./C3_W1_Assignment.ipynb s3://$bucket/C3_W1_Assignment_Learner.ipynb

upload: ./C3_W1_Assignment.ipynb to s3://sagemaker-us-east-1-593402094095/C3_W1_Assignment_Learner.ipynb
