# Step 1: Prepare the Hugging Face Model

In [None]:
pip install transformers torch sagemaker

In [None]:
import os

model_name = "dslim/bert-large-NER"

sanitized_model_name = model_name.replace("/", "-")

#We define the model on both Python and Bash to easily switch between them
os.environ["MODEL_NAME"] = model_name
os.environ["SANITIZED_MODEL_NAME"] = sanitized_model_name

print(sanitized_model_name)

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForTokenClassification
import torch

# Load model and tokenizer
#model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Save the model locally
model.save_pretrained("model")
tokenizer.save_pretrained("model")

In [None]:
%%bash 
mkdir -p model/code
cp inference.py model/code/
tar -czvf $SANITIZED_MODEL_NAME.tar.gz -C model . 

In [None]:
%%bash 
#Optional: Double check the tar file to make sure the required folder hierarchy is achieved
tar -tzvf $SANITIZED_MODEL_NAME.tar.gz

In [None]:
%%bash
aws s3 cp $SANITIZED_MODEL_NAME.tar.gz s3://sagemaker.demo.klaudsol.com/
rm $SANITIZED_MODEL_NAME.tar.gz
rm -rf model

In [None]:
role = "arn:aws:iam::XXXXXXXXXXXX:role/service-role/AmazonSageMaker-ExecutionRole-XXXXXXXXTXXXXXX"
model_uri = f"s3://sagemaker.demo.klaudsol.com/{sanitized_model_name}.tar.gz"
print(role)
print(model_uri)

In [None]:
from sagemaker.huggingface import HuggingFaceModel
import sagemaker

sess = sagemaker.Session()

hf_model = HuggingFaceModel(
    model_data=model_uri,
    role=role,
    transformers_version="4.26",
    pytorch_version="1.13",
    py_version="py39"
)
print(hf_model)

# Step 2: Deploy with Serverless Inference

In [None]:
from sagemaker.serverless import ServerlessInferenceConfig
# Serverless configuration with memory size and concurrency settings
serverless_config = ServerlessInferenceConfig(
    memory_size_in_mb=6144,  
    max_concurrency=1  # Max concurrent requests
)

# Deploying the model to the serverless endpoint
predictor = hf_model.deploy(
    initial_instance_count=0,  # Required for serverless inference
    serverless_inference_config=serverless_config,
    endpoint_name=sanitized_model_name
)


In [None]:
%%bash
#Optional: Check status of the endpoint
aws sagemaker describe-endpoint --endpoint-name $SANITIZED_MODEL_NAME | jq ".EndpointStatus"

In [None]:

data = {
   "inputs": "Hello, I am Pedro Penduko. You can call me Pedro. I live in Manila. I am a member of Data Engineering Pilipinas."
}

# request
response = predictor.predict(data)
print(response)

# Step 3: Create app that accesses the endpoint

In [None]:
import json
import boto3

runtime_client = boto3.client("sagemaker-runtime")

test_data = {"inputs":"Hello, I am Pedro Penduko. You can call me Pedro. I live in Manila. I am a member of Data Engineering Pilipinas."}
response = runtime_client.invoke_endpoint(
    EndpointName=sanitized_model_name,
    ContentType="application/json",
    Body=json.dumps(test_data)
)

print("Response:", response["Body"].read().decode())

# Optional: Cleanup Endpoint

In [None]:
predictor.delete_endpoint()

# Optional: Cleanup files from Notebook

In [None]:
%%bash

rm -Rf model
rm *.tar.gz