In [None]:
# --------------------------------------------
# SageMaker Fine-Tuned Summarization Model Deployment (Final)
# --------------------------------------------
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, HuggingFacePredictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
import boto3

In [None]:
# 1️⃣ Execution role (must have SageMaker permissions)
role = sagemaker.get_execution_role()  # Use full IAM ARN if running outside Studio

# 2️⃣ Define your fine-tuned HuggingFace model
model = HuggingFaceModel(
    model_data="s3://llm-model-artifacts-kchitresh/models/huggingface-pytorch-training-2026-01-20-04-04-05-613/output/model.tar.gz",
    role=role,
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    env={
        'HF_TASK': 'summarization'   # Must set for model to work
    }
)

# 3️⃣ Deploy the model as an endpoint
#    - GPU instance recommended for LLMs (ml.g5.xlarge)
#    - update_endpoint=True allows redeploying safely
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge",           # GPU instance to avoid memory issues
    endpoint_name="live-finetune-endpoint-v2",  # valid endpoint name
    update_endpoint=True
)

# 4️⃣ Wrap the endpoint with JSON serializer/deserializer for easy payload handling
predictor = HuggingFacePredictor(
    endpoint_name="live-finetune-endpoint-v2",
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer()
)

# 5️⃣ Example inference payload
payload = {
    "inputs": "Summarize this text: The quick brown fox jumps over the lazy dog.",
    "parameters": {
        "max_new_tokens": 256  # Optional generation settings
    }
}

# 6️⃣ Invoke the endpoint
result = predictor.predict(payload)

# 7️⃣ Print the summarization result
print("Summarization Result:", result)