## Setup

In [1]:
%pip -q install torch boto3 sagemaker --user

Note: you may need to restart the kernel to use updated packages.


In [2]:
import sagemaker

print(sagemaker.__version__)

sess = sagemaker.Session()
role = sagemaker.get_execution_role()

2.72.1


## Upload data to S3

In [3]:
!wget https://sagemaker-us-east-1-809378912851.s3.amazonaws.com/huggingface/model.tar.gz

--2022-01-10 15:15:36--  https://sagemaker-us-east-1-809378912851.s3.amazonaws.com/huggingface/model.tar.gz
Resolving sagemaker-us-east-1-809378912851.s3.amazonaws.com (sagemaker-us-east-1-809378912851.s3.amazonaws.com)... 52.216.153.116
Connecting to sagemaker-us-east-1-809378912851.s3.amazonaws.com (sagemaker-us-east-1-809378912851.s3.amazonaws.com)|52.216.153.116|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 955270513 (911M) [binary/octet-stream]
Saving to: ‘model.tar.gz’


2022-01-10 15:16:03 (33.6 MB/s) - ‘model.tar.gz’ saved [955270513/955270513]



In [4]:
model_archive = 'model.tar.gz'
prefix = 'huggingface'
model_data_url = sess.upload_data(str(model_archive), key_prefix=prefix)
model_data_url

's3://sagemaker-us-east-1-809378912851/huggingface/model.tar.gz'

In [5]:
transformers_version='4.12.3'
pytorch_version='1.9.1'
py_version='py38'

### Use boto3 to deploy with serverless inference

https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html

In [6]:
import boto3

sm = boto3.client(service_name='sagemaker')
sm_rt = boto3.client(service_name='sagemaker-runtime')

In [7]:
from time import gmtime, strftime

def name_with_timestamp(name):
    return '{}-{}'.format(name, strftime('%Y-%m-%d-%H-%M-%S', gmtime()))

In [8]:
huggingface_model_name    = name_with_timestamp('huggingface-serverless')
huggingface_epc_name      = name_with_timestamp('huggingface-serverless-epc')
huggingface_endpoint_name = name_with_timestamp('huggingface-serverless-ep')

### Create model

In [9]:
region = boto3.session.Session().region_name

image_uri = sagemaker.image_uris.retrieve(
    framework='huggingface',
    base_framework_version=f'pytorch{pytorch_version}',
    region=region,
    version=transformers_version,
    py_version=py_version,
    instance_type='ml.m5.large',   # No GPU support on serverless inference
    image_scope='inference'
)

image_uri

'763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.9.1-transformers4.12.3-cpu-py38-ubuntu20.04'

In [10]:
create_model_response = sm.create_model(
    ModelName=huggingface_model_name,
    Containers=[
        {
            'Image': image_uri,
            'Mode': 'SingleModel',
            'ModelDataUrl': model_data_url
        }
    ],
    ExecutionRoleArn=role,
)

create_model_response["ModelArn"]

'arn:aws:sagemaker:us-east-1:809378912851:model/huggingface-serverless-2022-01-10-15-16-16'

### Create endpoint configuration

In [11]:
endpoint_config_response = sm.create_endpoint_config(
    EndpointConfigName=huggingface_epc_name,
    ProductionVariants=[
        {
            'VariantName': 'single-variant',
            'ModelName': huggingface_model_name,
            'ServerlessConfig': {
                'MemorySizeInMB': 6144,
                'MaxConcurrency': 8,
            },
        },
    ],
)

endpoint_config_response['EndpointConfigArn']

'arn:aws:sagemaker:us-east-1:809378912851:endpoint-config/huggingface-serverless-epc-2022-01-10-15-16-16'

### Create endpoint

In [12]:
create_endpoint_response = sm.create_endpoint(
    EndpointName=huggingface_endpoint_name,
    EndpointConfigName=huggingface_epc_name,
)

create_endpoint_response['EndpointArn']

'arn:aws:sagemaker:us-east-1:809378912851:endpoint/huggingface-serverless-ep-2022-01-10-15-16-16'

In [13]:
waiter = sm.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=huggingface_endpoint_name)

### Invoke endpoint

In [14]:
import boto3, threading, time, json

sm_rt = boto3.client(service_name='sagemaker-runtime')

In [15]:
test_data_16 = {'inputs': "The Phantom Menace was a waste of my life. Die, Jar Jar, die!"}

test_data_250 = {'inputs': "Naked but not afraid, a young man roams the forest, growling in all fours. \
He behaves like a beast. To him, this is not a theatrical exercise but the true manifestation of his instincts. \
In Nathalie Biancheri's offbeat drama “Wolf,” he is one in a group of teenagers convinced their fragile human \
bodies don’t correspond with their animal identities. Their condition, described as “species dysphoria,” \
ostracizes them from society.For Jacob (George MacKay), the wolf in question, being admitted into a facility \
where those afflicted receive corrective treatment is a last frontier between fulfilling his parents’ wish for \
normalcy or running wild without remorse.Jacob steps into a pack of fellow patients and meets among several \
others, Rufus (Fionn O'Shea), who thinks of himself as a lovable German Shepherd, and love interest Wildcat \
(Lily-Rose Depp), a long house-trained resident under the thumb of a key staff member. Some of them have a \
hard time adjusting, and get “prop privileges” to wear costumes that bring them closer to their desired form. \
Despite what it entails, the setup is never played for laughs, but the opposite. Their desperation has a deep \
sadness. But for as much writer/director Biancheri pumps copious ideas into this concept, the solemn tone and \
lack of thematic focus renders the overwrought outing underwhelming. A premise like this would have been more \
effective had it been executed with the acidity of someone like director Yorgos Lanthimos, in which the premise \
could unfold as satirical commentary rather than straightforward indignation. "}

In [16]:
tick = time.time()
response = sm_rt.invoke_endpoint(
            EndpointName=huggingface_endpoint_name,
            Body=json.dumps(test_data_16),
            ContentType='application/json'
)
tock = time.time()
print(tock-tick)
print(response["Body"].read())

13.917281150817871
b'[{"label":"LABEL_0","score":0.9800038933753967}]'


### Cleanup

In [17]:
sm.delete_endpoint(EndpointName=huggingface_endpoint_name)
sm.delete_endpoint_config(EndpointConfigName=huggingface_epc_name)
sm.delete_model(ModelName=huggingface_model_name)

{'ResponseMetadata': {'RequestId': 'd80ec44d-c5e8-4e9e-9d10-9e1a888344c6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd80ec44d-c5e8-4e9e-9d10-9e1a888344c6',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Mon, 10 Jan 2022 15:21:33 GMT'},
  'RetryAttempts': 0}}