# 1. Package the fine tuned model output as a TAR file

In [2]:
!cd ./fine_tuned_sentence-transformers-distilbert-base-nli-stsb-mean-tokens/ && tar czvf ../model.tar.gz *

1_Pooling/
1_Pooling/config.json
README.md
config.json
config_sentence_transformers.json
eval/
eval/similarity_evaluation_pqa-valucation_results.csv
model.safetensors
modules.json
sentence_bert_config.json
similarity_evaluation_pqa-test_results.csv
special_tokens_map.json
tokenizer.json
tokenizer_config.json
vocab.txt


# 2. Upload the model output to S3

In [3]:
import sagemaker

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

inputs = sagemaker_session.upload_data(path='model.tar.gz')

inputs

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


's3://sagemaker-us-east-1-854256022130/data/model.tar.gz'

# 3. Deploy the model as a Sagemaker Endpoint

In [4]:
from sagemaker.pytorch import PyTorch, PyTorchModel
from sagemaker.predictor import Predictor
from sagemaker import get_execution_role

class StringPredictor(Predictor):
    def __init__(self, endpoint_name, sagemaker_session):
        super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain')


In [5]:
import time

pytorch_model = PyTorchModel(model_data = inputs, 
                             role=role, 
                             entry_point ='inference.py',
                             source_dir = './endpoint_code',
                             py_version = 'py39', 
                             framework_version = '1.13.1',
                             predictor_cls=StringPredictor)

predictor = pytorch_model.deploy(instance_type='ml.m5d.large', 
                                 initial_instance_count=1, 
                                 endpoint_name = f'semantic-search-model-{int(time.time())}')

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
------!

content_type is a no-op in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


# 4. Test the endpoint by calling it

In [8]:
import json

original_payload = 'Does this work with xbox?'
features = predictor.predict(original_payload)
vector_data = json.loads(features)

vector_data

[-0.04391804337501526,
 0.007933881133794785,
 0.08752340078353882,
 0.5915279388427734,
 0.2645290493965149,
 -0.4721102714538574,
 1.120548963546753,
 0.48026007413864136,
 0.9890450835227966,
 -0.24132195115089417,
 -0.2677488327026367,
 0.48333364725112915,
 -0.24363267421722412,
 -0.36993247270584106,
 0.3525908589363098,
 -0.8197868466377258,
 -0.9145488739013672,
 0.43198564648628235,
 -0.14956851303577423,
 -0.2140531986951828,
 -0.4034903645515442,
 0.1760721653699875,
 -0.43686628341674805,
 0.7702944874763489,
 -0.42714276909828186,
 0.6551374197006226,
 0.9220091700553894,
 -0.40303096175193787,
 -0.022558607161045074,
 0.6489596366882324,
 -0.029975101351737976,
 0.4277136027812958,
 -0.609686553478241,
 0.6964616775512695,
 0.3097139894962311,
 -0.08486735075712204,
 0.24560806155204773,
 1.3777480125427246,
 1.0644712448120117,
 -0.9459939002990723,
 0.22072111070156097,
 -0.15356670320034027,
 1.0034153461456299,
 0.31822213530540466,
 -0.0955590084195137,
 0.2248880565

# Optional: Delete the endpoint if needed

In [None]:
import boto3

sagemaker_client = boto3.client('sagemaker')

# Delete endpoint
sagemaker_client.delete_endpoint(EndpointName='<endpoint-name>')