In [12]:
%%writefile service.py
import bentoml

from bentoml.io import Text, JSON
from transformers import pipeline

class PretrainedModelRunnable(bentoml.Runnable):
    SUPPORTED_RESOURCES = ("cpu",)
    SUPPORTS_CPU_MULTI_THREADING = True

    def __init__(self):
        self.classifier = pipeline(task="text-classification", model='GroNLP/hateBERT')

    @bentoml.Runnable.method(batchable=False)
    def __call__(self, input_text):
        return self.classifier(input_text)

runner = bentoml.Runner(PretrainedModelRunnable, name="pretrained_unmasker")

svc = bentoml.Service('pretrained_classification_service', runners=[runner])

@svc.api(input=Text(), output=JSON())
async def detectViolence(input_series: str) -> list:
    return await runner.async_run(input_series)

Overwriting service.py


In [16]:
!bentoml serve service.py:svc --reload

2023-03-20T13:23:29-0600 [INFO] [cli] Prometheus metrics for HTTP BentoServer from "service.py:svc" can be accessed at http://localhost:3000/metrics.
2023-03-20T13:23:30-0600 [INFO] [cli] Starting development HTTP BentoServer from "service.py:svc" listening on http://0.0.0.0:3000 (Press CTRL+C to quit)
2023-03-20 13:23:30 circus[50230] [INFO] Loading the plugin...
2023-03-20 13:23:30 circus[50230] [INFO] Endpoint: 'tcp://127.0.0.1:60393'
2023-03-20 13:23:30 circus[50230] [INFO] Pub/sub: 'tcp://127.0.0.1:60394'
2023-03-20T13:23:30-0600 [INFO] [observer] Watching directories: ['/Users/li/OMSA/FullStackDL/BentoML/huggingface_deployment', '/Users/li/bentoml/models']
Some weights of the model checkpoint at GroNLP/hateBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.

In [14]:
%%writefile bentofile.yaml
service: "service.py:svc"
labels:
include:
- "*.py"
python:
  packages:
  - transformers
  - torch

Overwriting bentofile.yaml


In [None]:
!bentoml build

Building BentoML service "pretrained_classification_service:2qgvurwfjojwv6wa" from build context "/Users/li/OMSA/FullStackDL/BentoML/huggingface_deployment".
Locking PyPI package versions.

██████╗░███████╗███╗░░██╗████████╗░█████╗░███╗░░░███╗██╗░░░░░
██╔══██╗██╔════╝████╗░██║╚══██╔══╝██╔══██╗████╗░████║██║░░░░░
██████╦╝█████╗░░██╔██╗██║░░░██║░░░██║░░██║██╔████╔██║██║░░░░░
██╔══██╗██╔══╝░░██║╚████║░░░██║░░░██║░░██║██║╚██╔╝██║██║░░░░░
██████╦╝███████╗██║░╚███║░░░██║░░░╚█████╔╝██║░╚═╝░██║███████╗
╚═════╝░╚══════╝╚═╝░░╚══╝░░░╚═╝░░░░╚════╝░╚═╝░░░░░╚═╝╚══════╝

Successfully built Bento(tag="pretrained_classification_service:2qgvurwfjojwv6wa").


In [1]:
!bentoml serve pretrained_classification_service:latest --production

2023-03-25T18:49:18-0600 [INFO] [cli] Environ for worker 0: set CPU thread count to 10
2023-03-25T18:49:18-0600 [INFO] [cli] Prometheus metrics for HTTP BentoServer from "pretrained_classification_service:latest" can be accessed at http://localhost:3000/metrics.
2023-03-25T18:49:19-0600 [INFO] [cli] Starting production HTTP BentoServer from "pretrained_classification_service:latest" listening on http://0.0.0.0:3000 (Press CTRL+C to quit)
Some weights of the model checkpoint at GroNLP/hateBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing 

In [None]:
ckpt = "GroNLP/hateBERT"
NUM_LABELS = 2
model = (AutoModelForSequenceClassification.from_pretrained(ckpt, num_labels=NUM_LABELS)
tokenizer = AutoTokenizer.from_pretrained(ckpt)
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)


In [None]:
from datasets import load_dataset
from transformers import AutoModelForMaskedLM, AutoTokenizer, Trainer, TrainingArguments

dataset = load_dataset("yelp_review_full")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

model = AutoModelForMaskedLM.from_pretrained("bert-base-cased", num_labels=5)

training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
)

trainer.train()

import bentoml
from transformers import pipeline

unmasker = pipeline('fill-mask', model=model, tokenizer=tokenizer)

bentoml.transformers.save_model(name="unmasker", pipeline=unmasker)



In [4]:
#Make an API Request
import requests
import json

url =  'https://xb7e2wbuwj.execute-api.us-east-2.amazonaws.com/' #Endpoint from AWS API Gateway
data = {"data":"[5.1, 3.5, 1.4, 0.2]"} #Lambda function payload

response = requests.post(url=url, data=json.dumps(data))
print(response.text)

{"message":"Internal Server Error"}


In [6]:
#374806654920.dkr.ecr.us-east-2.amazonaws.com/pretrained_classification:2qgvurwfjojwv6wa
#https://runtime.sagemaker.us-east-2.amazonaws.com/endpoints/quickstart-endpoint/invocations
import boto3

# Create a low-level client representing Amazon SageMaker Runtime
sagemaker_runtime = boto3.client("sagemaker-runtime", region_name="us-east-2")

# The name of the endpoint. The name must be unique within an AWS Region in your AWS account. 
endpoint_name='pretrained-classification-endpoint'

# After you deploy a model into production using SageMaker hosting 
# services, your client applications use this API to get inferences 
# from the model hosted at the specified endpoint.
response = sagemaker_runtime.invoke_endpoint(
                            EndpointName=endpoint_name, 
                            Body=bytes('{"text": "This is great!"}', 'utf-8') # Replace with your own data.
                            )

# Optional - Print the response body and decode it so it is human read-able.
print(response['Body'].read().decode('utf-8'))

[{"label":"LABEL_0","score":0.676551342010498}]


In [None]:
API_URL = "https://hc3gvhv6ha.execute-api.us-east-2.amazonaws.com"
