**** Purpose: Deployment to endpoint

In [18]:
#Step0 Directory setups:
#!mkdir -p code
#!mv inference.py code/
#!rm -r bert
!rm -rf code/.ipynb_checkpoints


In [14]:
from transformers import DistilBertTokenizer, DistilBertModel

DistilBertTokenizer.from_pretrained("distilbert-base-uncased").save_pretrained("./bert")
DistilBertModel.from_pretrained("distilbert-base-uncased").save_pretrained("./bert")


In [17]:
#Step 1: Create new tar.gz with code/ included
!tar -czvf model.tar.gz logreg_model.joblib label_encoder.joblib bert code/


logreg_model.joblib
label_encoder.joblib
bert/
bert/special_tokens_map.json
bert/vocab.txt
bert/tokenizer_config.json
bert/model.safetensors
bert/config.json
code/
code/inference.py


In [27]:
!tar -tzvf model.tar.gz

-rw-rw-r-- ec2-user/ec2-user 31647 2025-07-13 03:29 logreg_model.joblib
-rw-rw-r-- ec2-user/ec2-user   707 2025-07-13 03:21 label_encoder.joblib
drwxrwxr-x ec2-user/ec2-user     0 2025-07-13 17:38 bert/
-rw-rw-r-- ec2-user/ec2-user   125 2025-07-13 17:38 bert/special_tokens_map.json
-rw-rw-r-- ec2-user/ec2-user 231508 2025-07-13 17:38 bert/vocab.txt
-rw-rw-r-- ec2-user/ec2-user   1278 2025-07-13 17:38 bert/tokenizer_config.json
-rw-rw-r-- ec2-user/ec2-user 265462608 2025-07-13 17:38 bert/model.safetensors
-rw-rw-r-- ec2-user/ec2-user       500 2025-07-13 17:38 bert/config.json
drwxrwxr-x ec2-user/ec2-user         0 2025-07-13 17:40 code/
-rw-rw-r-- ec2-user/ec2-user      1658 2025-07-13 17:39 code/inference.py


In [20]:
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import get_execution_role
import sagemaker

In [21]:
role = get_execution_role()
sagemaker_session = sagemaker.Session()

In [22]:
#STEP 2: Upload tar to S3
import boto3

s3 = boto3.client('s3')
s3.upload_file(
    Filename='model.tar.gz',
    Bucket='complaint-classifier-jp2025',
    Key='models/model.tar.gz'
)


In [28]:
#Step 3: Deploy using Model()
from sagemaker.model import Model

image_uri = '763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-cpu-py39-ubuntu20.04'

model = Model(
    image_uri=image_uri,
    model_data="s3://complaint-classifier-jp2025/models/model.tar.gz",  # must contain joblib, bert/, code/
    role=role,
    entry_point="inference.py",
    source_dir="code",
    sagemaker_session=sagemaker_session,
    env={
        "SAGEMAKER_PROGRAM": "inference.py"
    }
)


predictor = model.deploy(
    instance_type='ml.m5.large',
    initial_instance_count=1,
    endpoint_name='complaint-logreg-bert-endpoint'
)


------!

In [29]:
print(predictor)

None


In [32]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer

predictor = Predictor(
    endpoint_name="complaint-logreg-bert-endpoint",
    serializer=JSONSerializer()
)

payload = {"text": "I was charged extra interest after my loan was closed."}
response = predictor.predict(payload)
print(response)


b'{"label": "mortgages_and_loans", "confidence": 0.9758066608688755}'


In [33]:
#predictor.delete_endpoint()