In [1]:
!pip install --upgrade boto3
!pip install --upgrade sagemaker

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting boto3
  Downloading boto3-1.26.143-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
Collecting botocore<1.30.0,>=1.29.143
  Downloading botocore-1.29.143-py3-none-any.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m118.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: botocore, boto3
  Attempting uninstall: botocore
    Found existing installation: botocore 1.29.139
    Uninstalling botocore-1.29.139:
      Successfully uninstalled botocore-1.29.139
  Attempting uninstall: boto3
    Found existing installation: boto3 1.26.139
    Uninstalling boto3-1.26.139:
      Successfully uninstalled boto3-1.26.139
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. Th

In [15]:
# For notebook instances (Amazon Linux)
!sudo yum update -y
!sudo yum install amazon-linux-extras
!sudo amazon-linux-extras install epel -y
!sudo yum update -y
!sudo yum install git-lfs git -y

Loaded plugins: dkms-build-requires, extras_suggestions, langpacks, priorities,
              : update-motd, versionlock
amzn2-core                                               | 3.7 kB     00:00     
https://download.docker.com/linux/centos/2/x86_64/stable/repodata/repomd.xml: [Errno 14] HTTPS Error 404 - Not Found
Trying other mirror.
github_git-lfs/x86_64/signature                          |  819 B     00:00     
github_git-lfs/x86_64/signature                          |  951 B     00:00 !!! 
github_git-lfs-source/signature                          |  819 B     00:00     
github_git-lfs-source/signature                          |  951 B     00:00 !!! 
neuron                                                   | 2.9 kB     00:00     
61 packages excluded due to repository priority protections
No packages marked for update
Detected operating system as amzn/2.
Checking for curl...
Detected curl...
Downloading repository file: https://packagecloud.io/install/repositories/github/git-lfs/c

In [3]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::513489159680:role/NotebookStack-SmartSearchNotebookRole6F6BB12B-19HH1K0QXL4HN
sagemaker bucket: sagemaker-us-east-2-513489159680
sagemaker session region: us-east-2


In [4]:
!mkdir code

In [5]:
%%writefile code/inference.py

from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F

# Helper: Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


def model_fn(model_dir):
  # Load model from HuggingFace Hub
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
  model = AutoModel.from_pretrained(model_dir)
  return model, tokenizer

def predict_fn(data, model_and_tokenizer):
    # destruct model and tokenizer
    model, tokenizer = model_and_tokenizer

    # Tokenize sentences
    sentences = data.pop("inputs", data)
    encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input)

    # Perform pooling
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

    # Normalize embeddings
    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

    # return dictonary, which will be json serializable
    return {"vectors": sentence_embeddings.tolist()}


Writing code/inference.py


In [6]:
repository = "sentence-transformers/all-MiniLM-L6-v2"
model_id=repository.split("/")[-1]
s3_location=f"s3://{sess.default_bucket()}/custom_inference/{model_id}/model.tar.gz"

In [26]:
!git lfs install
!git clone https://huggingface.co/$repository

Updated git hooks.
Git LFS initialized.
Cloning into 'all-MiniLM-L6-v2'...
remote: Enumerating objects: 46, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 46 (delta 10), reused 21 (delta 10), pack-reused 25[K
Unpacking objects: 100% (46/46), 314.94 KiB | 10.16 MiB/s, done.
Filtering content: 100% (3/3), 260.15 MiB | 166.49 MiB/s, done.


In [8]:
!cp -r code/ $model_id/code/

In [27]:
%cd $model_id
!tar zcvf model.tar.gz *

/home/ec2-user/SageMaker/isearchjupyter/Embbeding Model/all-MiniLM-L6-v2/all-MiniLM-L6-v2
1_Pooling/
1_Pooling/config.json
config.json
config_sentence_transformers.json
data_config.json
modules.json
pytorch_model.bin
README.md
rust_model.ot
sentence_bert_config.json
special_tokens_map.json
tf_model.h5
tokenizer_config.json
tokenizer.json
train_script.py
vocab.txt


In [28]:
!aws s3 cp model.tar.gz $s3_location

upload: ./model.tar.gz to s3://sagemaker-us-east-2-513489159680/custom_inference/all-MiniLM-L6-v2/model.tar.gz


In [32]:
import sagemaker
from sagemaker.huggingface.model import HuggingFaceModel

role = sagemaker.get_execution_role()

hub = {
	'HF_MODEL_ID':'sentence-transformers/all-MiniLM-L6-v2',
	'HF_TASK':'feature-extraction'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_location,       # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.12",  # transformers version used
   pytorch_version="1.9",        # pytorch version used
   py_version='py38',            # python version used
   env=hub,
)

endpoint_name = 'pytorch-inference-all-minilm-l6-v2'
instance_type = 'ml.g4dn.2xlarge'
instance_count = 1

# deploy the endpoint endpoint
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
predictor = huggingface_model.deploy(
    endpoint_name = endpoint_name,
    instance_type = instance_type, 
    initial_instance_count = instance_count,
    serializer = JSONSerializer(),
    deserializer = JSONDeserializer()
)


----------!

In [33]:
data = {
  "inputs": "the mesmerizing performances of the leads keep the film grounded and keep the audience riveted .",
}

res = predictor.predict(data=data)
print(res)

[[[0.0402759350836277, -0.18730594217777252, 0.20511218905448914, -0.08549898117780685, 0.08048872649669647, 0.3330766558647156, -0.18683570623397827, -0.020703544840216637, 0.10167103260755539, 0.02183460257947445, -0.23397241532802582, 0.09859272092580795, 0.06610071659088135, 0.1326000839471817, -0.26474419236183167, -0.21779508888721466, 0.13376353681087494, -0.025470247492194176, 0.11203114688396454, 0.03479009121656418, 0.043245118111371994, -0.21407835185527802, -0.0149691766127944, -0.027196193113923073, -0.1920822262763977, -0.42878660559654236, 0.10259508341550827, 0.16004501283168793, 0.08512520790100098, -0.9987814426422119, -0.07192987203598022, -0.3467758595943451, -0.06367125362157822, 0.05760439112782478, -0.18493948876857758, 0.1340024471282959, -0.11645074933767319, -0.008078264072537422, -0.30667421221733093, 0.11143828183412552, 0.28458890318870544, 0.09542825818061829, 0.24503828585147858, 0.30562371015548706, 0.1984367072582245, -0.39372655749320984, -0.0560110658