In [107]:
import sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [108]:
sess = sagemaker.Session()
role = sagemaker.get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [5]:
# enter the s3 bucket name if you want to use a different bucket or leave None to use default bucket

sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

In [109]:
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::369147666653:role/service-role/AmazonSageMaker-ExecutionRole-20231121T150534
sagemaker bucket: sagemaker-us-east-1-369147666653
sagemaker session region: us-east-1


In [149]:
# create a code directory where you will be storing all the 
!mkdir code

In [150]:
%%writefile code/inference.py

import torch.nn.functional as F
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from torch import cuda


device = 'cuda' if cuda.is_available() else 'cpu'

def model_fn(model_dir):
  # Load model from HuggingFace Hub
    model = T5ForConditionalGeneration.from_pretrained(model_dir)
    model = model.to(device)
    tokenizer = T5Tokenizer.from_pretrained("t5-base")
    return model, tokenizer

def predict_fn(data, model_and_tokenizer):
    # destruct model and tokenizer
    model, tokenizer = model_and_tokenizer
    
    # Tokenize sentences
    sentences = data.pop("inputs", data)
    input_ids = tokenizer.encode(sentences, return_tensors="pt", max_length=512, truncation=True, padding=True)
    input_ids = input_ids.to(device)

    # Compute token embeddings
    with torch.no_grad():
        output_ids = model.generate(input_ids, max_length=50, num_beams=4, early_stopping=True)

    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    # return dictonary, which will be json serializable
    return {"summary": output_text}

Writing code/inference.py


In [151]:
# This cell assumes that you already have a hugging face model uploaded in a model.zip file
! aws s3 cp s3://sagemaker-studio-369147666653-pfmu8u7ny0k/model_files.zip /root/model_files.zip
! unzip /root/model_files.zip

download: s3://sagemaker-studio-369147666653-pfmu8u7ny0k/model_files.zip to ./model_files.zip
Archive:  /root/model_files.zip
  inflating: model_files/config.json  
  inflating: model_files/generation_config.json  
  inflating: model_files/pytorch_model.bin  
  inflating: model_files/special_tokens_map.json  
  inflating: model_files/spiece.model  
  inflating: model_files/tokenizer_config.json  


In [152]:
model_id="model_files"
s3_location=f"s3://{sess.default_bucket()}/custom_inference/{model_id}/model.tar.gz"

In [153]:
# moving the code inside the model folder to make sure to follow the convention for the model folder for Sagemaker
!cp -r code/ $model_id/code/

In [154]:
# compressing the model folder that contains model and inference code
%cd $model_id
!tar zcvf model.tar.gz *

/root/model_files
code/
code/inference.py
config.json
generation_config.json
pytorch_model.bin
special_tokens_map.json
spiece.model
tokenizer_config.json


In [155]:
# moving the model file to s3 location from where the huggingface class will create the deployment and endpoint.
!aws s3 cp model.tar.gz $s3_location

upload: ./model.tar.gz to s3://sagemaker-us-east-1-369147666653/custom_inference/model_files/model.tar.gz


In [156]:
# Deploying the model

from sagemaker.huggingface.model import HuggingFaceModel


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_location,       # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.26",  # transformers version used
   pytorch_version="1.13",        # pytorch version used
   py_version='py39',            # python version used
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.2xlarge"
    )

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
-----!

In [157]:
# making predictions
data = {
  "inputs": "The Sacramento Kings won 48 games a year ago, but they proved early in the offseason that they weren't satisfied with a first-round playoff exit. They were surprisingly linked to Bradley Beal on the trade market, and according to The Athletic's Shams Charania, that was no accident. They are going to be involved in every star player,that hits the trade market, Charania said, listing Pascal Siakam, OG Anunoby and Zach LaVine as players they will consider. Fast forward to today, and one month into the season, the Kings are 8-4 and playing better than their record would indicate. They are 6-1 with De'Aaron Fox in the lineup, and in the minutes he has played, they are outscoring opponents by 11.7 points per 100 possessions. Their offense remains among the NBAs best, but their defense has risen up to roughly league-average. If the Kings arent already championship contenders, theyre not far away. For a variety of reasons, the Kings are fairly well-situated to make a big trade. Sacramento operated below the cap this offseason, and the trickle down effect of that decision is that the Kings have the NBA's ninth-lowest payroll. They can afford to add another significant salary for the next few years, especially since core youngster Keegan Murray still has three years left on his rookie deal. They also have strong depth at even their weakest positions. Kevin Huerter and Malik Monk, for example, have relatively similar skill sets, so if one were traded, the loss wouldn't be quite as impactful. The Kings still owe the Hawks one first-round pick for the Huerter trade, but otherwise, they still have the capacity to send out three first-round picks and three first-round swaps if need be. As most of the core roster is young, picks aren't quite as important in the short-term as they are for other aspiring contenders. If the right deal presents itself, there is no reason the Kings cant make it.",
}

res = predictor.predict(data)
print(res)

{'summary': "The Kings are 8-4 and playing better than their record would indicate. For a variety of reasons, the Kings are fairly well-situated to make a big trade. The Kings have the NBA's ninth-"}


In [158]:
# Get endpoint name to use in future

predictor.endpoint_name

'huggingface-pytorch-inference-2023-11-24-02-16-49-573'

In [148]:
# Delete model and endpoint to avoid costs
predictor.delete_model()
predictor.delete_endpoint()