In [1]:
import sagemaker

sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

role = sagemaker.get_execution_role()
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::869312567674:role/service-role/AmazonSageMaker-ExecutionRole-20210820T145758
sagemaker bucket: sagemaker-us-east-1-869312567674
sagemaker session region: us-east-1


In [2]:
from sagemaker.huggingface import HuggingFace

# hyperparameters, which are passed into the training job
hyperparameters={'epochs': 1,
                 'train_batch_size': 32,
                 'model_name':'distilbert-base-uncased'
                 }

huggingface_estimator = HuggingFace(entry_point='train.py',
                            source_dir='./',
                            instance_type='ml.p3.2xlarge',
                            instance_count=1,
                            role=role,
                            transformers_version='4.6',
                            pytorch_version='1.7',
                            py_version='py36',
                            hyperparameters = hyperparameters)


In [3]:
s3_prefix = 'huggingface-2021-09-01-21-12-16-895/output'
training_input_path = f's3://{sess.default_bucket()}/{s3_prefix}/train_data'
test_input_path = f's3://{sess.default_bucket()}/{s3_prefix}/test_data'


In [4]:
training_input_path

's3://sagemaker-us-east-1-869312567674/huggingface-2021-09-01-21-12-16-895/output/train_data'

In [5]:
# starting the train job with our uploaded datasets as input
huggingface_estimator.fit({'train': training_input_path, 'test': test_input_path})

2021-09-01 21:33:11 Starting - Starting the training job...
2021-09-01 21:33:32 Starting - Launching requested ML instancesProfilerReport-1630531323: InProgress
.........
2021-09-01 21:34:54 Starting - Preparing the instances for training.........
2021-09-01 21:36:38 Downloading - Downloading input data......
2021-09-01 21:37:34 Training - Downloading the training image..............[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-09-01 21:39:54,616 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-09-01 21:39:54,639 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-09-01 21:39:57,667 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m

2021-09-01 21:39:55 Training - Training image download completed. Training in progress.[34m2021-09-01 21:40:36,071 sagemaker-t

In [6]:
predictor = huggingface_estimator.deploy(1,"ml.g4dn.xlarge")


---------------!

In [7]:
classification_input= {"inputs":"I love using the new device. It was awesome! "}

predictor.predict(classification_input)

[{'label': 'LABEL_1', 'score': 0.9965830445289612}]

In [8]:
classification_input= {"inputs":"I hate using the new device. It was hard to use! "}

predictor.predict(classification_input)

[{'label': 'LABEL_0', 'score': 0.9957138299942017}]

In [9]:
classification_input= {"inputs":"This device is just so so. "}

predictor.predict(classification_input)

[{'label': 'LABEL_1', 'score': 0.7585465312004089}]