In [1]:
!pip install "sagemaker>=2.48.0" "transformers==4.6.1" "datasets[s3]==1.6.2" --upgrade

Collecting sagemaker>=2.48.0
  Downloading sagemaker-2.68.0.tar.gz (452 kB)
[K     |████████████████████████████████| 452 kB 7.3 MB/s eta 0:00:01
[?25hCollecting transformers==4.6.1
  Downloading transformers-4.6.1-py3-none-any.whl (2.2 MB)
[K     |████████████████████████████████| 2.2 MB 49.2 MB/s eta 0:00:01
[?25hCollecting datasets[s3]==1.6.2
  Downloading datasets-1.6.2-py3-none-any.whl (221 kB)
[K     |████████████████████████████████| 221 kB 74.0 MB/s eta 0:00:01
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 21.1 MB/s eta 0:00:01
Collecting filelock
  Downloading filelock-3.3.2-py3-none-any.whl (9.7 kB)
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 36.6 MB/s eta 0:00:01
Collecting huggingface-hub==0.0.8
  Downlo

In [4]:
import sagemaker.huggingface
import sagemaker

sess = sagemaker.Session()
role = sagemaker.get_execution_role()

print(f"IAM role arn used for running training: {role}")
print(f"S3 bucket used for storing artifacts: {sess.default_bucket()}")

IAM role arn used for running training: arn:aws:iam::847380964353:role/spot-bot-SpotSageMakerExecutionRole-917OYJPI7O18
S3 bucket used for storing artifacts: sagemaker-us-west-2-847380964353


## train-classification

In [2]:
## prepare data
import pandas as pd
import os

data = pd.read_csv('../all_saved_train.csv', encoding='latin-1')
data.columns=["label","v2"]

# use csv file to test 
data[:180000].to_csv('./train.csv',index=False,encoding='utf-8')
data[180000:].to_csv('./test.csv',index=False,encoding='utf-8')

In [5]:
import boto3
prefix='hp-datalab'

bucket = sess.default_bucket() 
boto3.Session().resource("s3").Bucket(bucket).Object(
    os.path.join(prefix, "train/train.csv")
).upload_file("./train.csv")
boto3.Session().resource("s3").Bucket(bucket).Object(
    os.path.join(prefix, "test/test.csv")
).upload_file("./test.csv")

In [6]:
training_input_path = f's3://{sess.default_bucket()}/{prefix}/train/train.csv'
test_input_path = f's3://{sess.default_bucket()}/{prefix}/test/test.csv'

In [7]:
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.6.1'} # v4.6.1 is referring to the `transformers_version` you use in the estimator.

In [16]:
hyperparameters={'per_device_train_batch_size':4,
                 'per_device_eval_batch_size': 4,
                 'model_name_or_path': 'roberta-large',
                 'train_file':'/opt/ml/input/data/train/train.csv',
                 'validation_file':'/opt/ml/input/data/test/test.csv',
                 'test_file':'/opt/ml/input/data/test/test.csv',
                 'do_train': True,
                 'do_predict': True,
                 'do_eval': True,
                 'save_total_limit':3,
                 'num_train_epochs': 3,
                 'output_dir': '/opt/ml/model',
                 'num_train_epochs': 1,
                 'learning_rate': 5e-5,
                 'seed': 7,
                 'fp16': False,
                 'eval_steps': 1000,
                 }


In [23]:
from sagemaker.huggingface import HuggingFace

# create the Estimator
huggingface_estimator = HuggingFace(
      entry_point='run_glue.py', # script
      source_dir='./examples/pytorch/text-classification', # relative path to example
      git_config=git_config,
      instance_type='ml.p2.8xlarge',
      instance_count=1,
      volume_size=500,
      transformers_version='4.6',
      pytorch_version='1.7',
      py_version='py36',
      role=role,
      base_job_name='roberta-large-epoch3',
      hyperparameters = hyperparameters
)


In [None]:
huggingface_estimator.fit({'train':'s3://sagemaker-us-west-2-847380964353/hp-datalab/train/train.csv','test':'s3://sagemaker-us-west-2-847380964353/hp-datalab/test/test.csv'})


2021-11-09 06:27:49 Starting - Starting the training job...
2021-11-09 06:28:12 Starting - Launching requested ML instancesProfilerReport-1636439263: InProgress
.........
2021-11-09 06:29:35 Starting - Preparing the instances for training.........
2021-11-09 06:31:13 Downloading - Downloading input data
2021-11-09 06:31:13 Training - Downloading the training image.................[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-11-09 06:34:03,029 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-11-09 06:34:03,108 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-11-09 06:34:04,565 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-11-09 06:34:05,016 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/opt/conda

## deploy

In [None]:
from sagemaker.huggingface import HuggingFaceModel
import sagemaker 

role = sagemaker.get_execution_role()

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data="s3://sagemaker-us-west-2-847380964353/xlm-roberta-base-epoch1-2021-11-08-08-17-03-658/output/model.tar.gz",  # path to your trained sagemaker model
   role=role, # iam role with permissions to create an Endpoint
   transformers_version="4.6", # transformers version used
   pytorch_version="1.7", # pytorch version used
   py_version="py36", # python version of the DLC
)

In [None]:
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.g4dn.xlarge"
)

In [None]:
# example request, you always need to define "inputs"
data = {
   "inputs": "The new Hugging Face SageMaker DLC makes it super easy to deploy models in production. I love it!"
}

# request
predictor.predict(data)