In [1]:
!pip install "sagemaker>=2.48.0" "transformers==4.6.1" "datasets[s3]==1.6.2" --upgrade

Collecting sagemaker>=2.48.0
  Downloading sagemaker-2.63.2.tar.gz (447 kB)
[K     |████████████████████████████████| 447 kB 4.7 MB/s eta 0:00:01
[?25hCollecting transformers==4.6.1
  Downloading transformers-4.6.1-py3-none-any.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 62.9 MB/s eta 0:00:01
[?25hCollecting datasets[s3]==1.6.2
  Downloading datasets-1.6.2-py3-none-any.whl (221 kB)
[K     |████████████████████████████████| 221 kB 84.9 MB/s eta 0:00:01
Collecting huggingface-hub==0.0.8
  Downloading huggingface_hub-0.0.8-py3-none-any.whl (34 kB)
Collecting botocore==1.19.52
  Downloading botocore-1.19.52-py2.py3-none-any.whl (7.2 MB)
[K     |████████████████████████████████| 7.2 MB 30.4 MB/s eta 0:00:01
[?25hCollecting boto3==1.16.43
  Downloading boto3-1.16.43-py2.py3-none-any.whl (130 kB)
[K     |████████████████████████████████| 130 kB 80.3 MB/s eta 0:00:01
[?25hCollecting s3transfer<0.4.0,>=0.3.0
  Downloading s3transfer-0.3.7-py2.py3-none-any.whl (73 kB)


In [2]:
import sagemaker.huggingface
import sagemaker

sess = sagemaker.Session()
role = sagemaker.get_execution_role()

print(f"IAM role arn used for running training: {role}")
print(f"S3 bucket used for storing artifacts: {sess.default_bucket()}")

IAM role arn used for running training: arn:aws:iam::847380964353:role/spot-bot-SpotSageMakerExecutionRole-917OYJPI7O18
S3 bucket used for storing artifacts: sagemaker-us-east-2-847380964353


In [5]:
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.6.1'} # v4.6.1 is referring to the `transformers_version` you use in the estimator.

In [6]:
hyperparameters={'per_device_train_batch_size':1,
                 'per_device_eval_batch_size': 1,
                 'model_name_or_path': 'google/mt5-base',
                 'train_file':'/opt/ml/input/data/train/train.csv',
                 'validation_file':'/opt/ml/input/data/test/test.csv',
                 'test_file':'/opt/ml/input/data/test/test.csv',
                 'do_train': True,
                 'do_predict': True,
                 'do_eval': True,
                 'text_column':'article',
                 'summary_column':'summarization',
                 'save_total_limit':3,
                 'num_train_epochs': 1,
                 'predict_with_generate': True,
                 'output_dir': '/opt/ml/model',
                 'num_train_epochs': 1,
                 'learning_rate': 5e-5,
                 'seed': 7,
                 'fp16': True,
                 'source_prefix': "summarize: ",
                 'eval_steps': 1000,
                 }

# configuration for running training on smdistributed Data Parallel
distribution = {'smdistributed':{'dataparallel':{ 'enabled': True }}}

In [8]:
from sagemaker.huggingface import HuggingFace

# create the Estimator
huggingface_estimator = HuggingFace(
      entry_point='run_summarization.py', # script
      source_dir='./examples/pytorch/summarization', # relative path to example
      git_config=git_config,
      instance_type='ml.p3.16xlarge',
      instance_count=1,
      volume_size=500,
      transformers_version='4.6',
      pytorch_version='1.7',
      py_version='py36',
      role=role,
      hyperparameters = hyperparameters,
      distribution = distribution
)


In [None]:
huggingface_estimator.fit({'train':'s3://datalab2021/hk01/train_data/SUMMARY.hk01meta/train.csv','test':'s3://datalab2021/hk01/train_data/SUMMARY.hk01meta/test.csv'})

2021-10-20 07:48:10 Starting - Starting the training job...
2021-10-20 07:48:33 Starting - Launching requested ML instancesProfilerReport-1634716085: InProgress
.........
2021-10-20 07:49:54 Starting - Preparing the instances for training......
2021-10-20 07:51:09 Downloading - Downloading input data...
2021-10-20 07:51:34 Training - Downloading the training image................[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-10-20 07:54:17,828 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-10-20 07:54:17,906 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-10-20 07:54:20,939 sagemaker_pytorch_container.training INFO     Invoking SMDataParallel[0m
[34m2021-10-20 07:54:20,940 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-10-20 07:54:21,364 

In [5]:
from sagemaker.huggingface.model import HuggingFaceModel

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data="s3://sagemaker-us-west-2-847380964353/huggingface-pytorch-training-2021-10-14-09-10-28-789/output/model.tar.gz",  # path to your trained sagemaker model
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.6", # transformers version used
   pytorch_version="1.7", # pytorch version used
   py_version='py36', # python version used
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.p3.2xlarge"
)

# example request, you always need to define "inputs"
data = {
   "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."
}

# request
predictor.predict(data)


--------------------

KeyboardInterrupt: 