In [2]:
!pip install -q transformers datasets

#### Example from https://huggingface.co/gpt2

In [8]:
from transformers import pipeline, set_seed
generator = pipeline('text-generation', model='gpt2')
set_seed(42)
generator("The tallest building in Hong Kong is", max_length=30, num_return_sequences=5)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'The tallest building in Hong Kong is 1.25 meters (1.65 ft.) and is situated at a depth of 745 feet (460 m'},
 {'generated_text': 'The tallest building in Hong Kong is just three blocks and 17 meters tall.\n\nSome 40 buildings are being constructed, and the number is expected to'},
 {'generated_text': "The tallest building in Hong Kong is currently being renovated, designed by China's Hubei Group. It's more than eight stories tall, is at"},
 {'generated_text': 'The tallest building in Hong Kong is a skyscraper called "Lungshan Bridge" that rises from the island of Hong Kong. It is being'},
 {'generated_text': 'The tallest building in Hong Kong is the Royal Garden, but other skyscrapers have had their doors slammed into.\n\nJust last year, the'}]

### Deployment as an endpoint on SageMaker

In [9]:
from sagemaker.huggingface import HuggingFaceModel
import sagemaker

In [10]:
role = sagemaker.get_execution_role()

In [11]:
# Hub Model configuration. https://huggingface.co/models
hub = {
    'HF_MODEL_ID':'gpt2',
    'HF_TASK':'text-generation'
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    transformers_version='4.17.0',
    pytorch_version='1.10.2',
    py_version='py38',
    env=hub,
    role=role, 
)

In [12]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1, # number of instances
    instance_type="ml.g4dn.xlarge" # ec2 instance type
)


----------!

In [13]:
text = {
    "inputs": "The tallest building in Hong Kong is"
}

In [16]:
predictor.predict(text)

[{'generated_text': "The tallest building in Hong Kong is a three-storey tower by the Chinese New Year's Day building (pictured)) and is owned by the Hong Kong family of companies. But it was recently bought from family and is managed by a couple who own"}]

In [86]:
# predictor.delete_endpoint()

## Inference with SDK Boto3

In [17]:
import boto3
import json

In [18]:
runtime = boto3.client("sagemaker-runtime")

In [22]:
endpoint_name = "huggingface-pytorch-inference-2023-03-16-12-06-48-331"

In [23]:
text = {
    "inputs": "The tallest building in Hong Kong is"
}

In [24]:
response = runtime.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=json.dumps(text),
    ContentType="application/json",
)

print(response["Body"].read())

b'[{"generated_text":"The tallest building in Hong Kong is at 9,700 meters but I think we\'ve hit the right place for this.\\"\\n\\nThe tall building sits about 300 meters across.\\n\\nThe tower is on the property of Jiajia Capital Holdings"}]'
