In [1]:
!pip install -q sagemaker sagemaker[local]

[33mYou are using pip version 19.0.3, however version 20.2b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/hunkim-kogpt2'

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='hunkimSagemaker')['Role']['Arn']
    
print(role)

arn:aws:iam::294038372338:role/hunkimSagemaker


In [3]:
%%bash
mkdir data
touch data/README.md

mkdir: data: File exists


In [4]:
inputs = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)
print('input spec (in this case, just an S3 path): {}'.format(inputs))

input spec (in this case, just an S3 path): s3://sagemaker-us-west-2-294038372338/sagemaker/hunkim-kogpt2


## Dummy train
It just store the models and voc data

In [5]:
from sagemaker.mxnet import MXNet

m = MXNet('train.py',
          source_dir='KoGPT2',
          role=role,
          train_instance_count=1,
          train_instance_type='ml.c4.xlarge',
          #train_instance_type='local',
          framework_version='1.6.0',
          py_version='py3',
          # distributions={'parameter_server': {'enabled': True}},
          # available hyperparameters: emsize, nhid, nlayers, lr, clip, epochs, batch_size,        
          hyperparameters={'batch_size': 8,
                           'epochs': 2,
                           'lr': 0.01})

In [6]:
m.fit(inputs)

██████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[██████████████████████████████████................]#015[███████████████████████████████████...............]#015[███████████████████████████████████...............]#015[███████████████████████████████████...............]#015[███████████████████████████████████...............]#015[███████████████████████████████████...............]#015[████████

In [7]:
from sagemaker.mxnet.model import MXNetModel

training_job_name = m.latest_training_job.name
desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=training_job_name)
trained_model_location = desc['ModelArtifacts']['S3ModelArtifacts']
print(trained_model_location)
#s3://sagemaker-us-west-2-294038372338/mxnet-training-2020-06-07-02-55-29-667/output/model.tar.gz

mm = MXNetModel(model_data=trained_model_location,
          role=role,
          framework_version='1.6.0',
          entry_point='infer.py',
          source_dir='KoGPT2',
          py_version='py3',
          model_server_workers=2)

s3://sagemaker-us-west-2-294038372338/mxnet-training-2020-06-09-11-28-44-000/output/model.tar.gz


In [8]:
predictor = mm.deploy(initial_instance_count=1, 
                    instance_type='local')
                    #instance_type='ml.c5.xlarge')

-------------!

In [9]:
endpoint_name = predictor.endpoint
print(endpoint_name)

mxnet-inference-2020-06-09-11-37-31-656


In [10]:
import sagemaker
from sagemaker.mxnet.model import MXNetPredictor

def infer(input_sentence='아기 공룡 둘리는 희동이와', debug=False):
  predictor = MXNetPredictor(endpoint_name)

  pred_latency_sum = 0
  pred_count_sum = 0
  pred_cnt = 0

  for i in range(20):
    pred_out = predictor.predict(input_sentence)
    if i == 0:
      continue
    
    predicted_sentence= pred_out[0]
    predict_count = pred_out[1]
    predict_latency = pred_out[2]
  
    pred_latency_sum += predict_latency
    pred_count_sum =+ predict_count
    pred_cnt += 1


  avg_latency = pred_latency_sum / pred_cnt
  avg_latency_per_inf = avg_latency / pred_count_sum

  if debug:
    print('Input sentence: {}'.format(input_sentence))
    print('Predicted sentence: {}'.format(predicted_sentence))
    print('Average number of inferenced token: {:.2f}'.format(pred_count_sum))
    print('Average inference latency for a sentence completion: {:.2f}'.format(avg_latency))
    print('Average inference latency per a token: {:.2f}\n'.format(avg_latency_per_inf))
  
  return predicted_sentence

In [11]:
%%time
infer()

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from model with message "{
  "code": 500,
  "type": "InternalServerException",
  "message": "Worker died."
}
". See https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logEventViewer:group=/aws/sagemaker/Endpoints/mxnet-inference-2020-06-09-11-37-31-656 in account 294038372338 for more information.

## Cleanup

After you have finished with this example, remember to delete the prediction endpoint to release the instance(s) associated with it.

In [12]:
predictor.delete_endpoint()