In [None]:
%pip install "sagemaker>=2.48.0" "transformers==4.12.3" "datasets[s3]==1.18.3" "ipywidgets" --upgrade

In [2]:
import sagemaker
import botocore
import ipywidgets

from datasets import load_dataset
from datasets.filesystems import S3FileSystem
from sagemaker.huggingface import HuggingFace
from sagemaker.huggingface import HuggingFaceModel
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

In [None]:
sess = sagemaker.Session()
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    sagemaker_session_bucket = sess.default_bucket()

s3_prefix = 'Chapter05/NLP'
role = sagemaker.get_execution_role()
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [4]:
dataset_name = 'emotion'
num_labels=6
tokenizer_name='bert-base-uncased'

In [None]:
dataset = load_dataset(dataset_name)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True)

dataset_encoded = dataset.map(tokenize, batched=True, batch_size=None)


In [None]:
model = (AutoModelForSequenceClassification.from_pretrained(tokenizer_name, num_labels=num_labels))
dataset_encoded.set_format("torch", columns=["input_ids", "attention_mask", "label"])
dataset_encoded["train"].features

In [26]:
hyperparameters = {
	'model_name_or_path':tokenizer_name,
    'task_name': 'mnli',
	'output_dir':'/opt/ml/model',
}

In [27]:
git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.17.0'}

In [24]:
huggingface_estimator = HuggingFace(
    entry_point='run_glue.py',
    source_dir='./examples/pytorch/text-classification',
	instance_type='ml.p3.2xlarge',
	instance_count=1,
	role=role,
	git_config=git_config,
	transformers_version='4.17.0',
	pytorch_version='1.10.2',
	py_version='py38',
	hyperparameters = hyperparameters,
    train_dataset=dataset_encoded["train"],
    eval_dataset=dataset_encoded["validation"]
)

In [None]:
huggingface_estimator.fit()

In [28]:
hub = {
	'HF_MODEL_ID':'bert-base-uncased',
	'HF_TASK':'text-classification'
}

In [29]:
huggingface_model = HuggingFaceModel(
	transformers_version='4.17.0',
	pytorch_version='1.10.2',
	py_version='py38',
	env=hub,
	role=role, 
)

In [None]:
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type='ml.m5.xlarge'
)

In [None]:
predictor.predict({
	'inputs': "OMG I need this ticket done TODAY!!!111! Can you set up a new VM for me to run Minecraft!?"
})

In [None]:
predictor.predict({
	'inputs': "I was not expecting that"
})

In [34]:
predictor.delete_endpoint()