In [1]:
!pip install -U sagemaker

Collecting sagemaker
  Downloading sagemaker-3.3.1-py3-none-any.whl.metadata (20 kB)
Collecting sagemaker-core<3.0.0,>=2.3.1 (from sagemaker)
  Downloading sagemaker_core-2.3.1-py3-none-any.whl.metadata (5.5 kB)
Collecting sagemaker-train<2.0.0,>=1.3.1 (from sagemaker)
  Downloading sagemaker_train-1.3.1-py3-none-any.whl.metadata (7.8 kB)
Collecting sagemaker-serve<2.0.0,>=1.3.1 (from sagemaker)
  Downloading sagemaker_serve-1.3.1-py3-none-any.whl.metadata (1.6 kB)
Collecting sagemaker-mlops<2.0.0,>=1.3.1 (from sagemaker)
  Downloading sagemaker_mlops-1.3.1-py3-none-any.whl.metadata (5.7 kB)
Collecting rich<14.0.0,>=13.0.0 (from sagemaker-core<3.0.0,>=2.3.1->sagemaker)
  Using cached rich-13.9.4-py3-none-any.whl.metadata (18 kB)
Collecting protobuf<5.0,>=3.12 (from sagemaker-core<3.0.0,>=2.3.1->sagemaker)
  Using cached protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting torch>=1.9.0 (from sagemaker-core<3.0.0,>=2.3.1->sagemaker)
  Using cached torch-2.6.

In [2]:
import sagemaker

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
from sagemaker.huggingface import HuggingFace

In [4]:
role = sagemaker.get_execution_role()

In [5]:
role

'arn:aws:iam::516124113879:role/SageMakerLLMRole'

In [6]:
hyperparameters = {
    "model_id": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
    "epochs": 2,
    "per_device_train_batch_size": 2,
    "lr": 2e-5
}

In [7]:
estimator = HuggingFace(
    entry_point="train.py",
    source_dir="./scripts",
    role=role,
    transformers_version="4.36",
    pytorch_version="2.1",
    py_version="py310",
    instance_type="ml.g5.xlarge",
    instance_count=1,
    output_path="s3://llm-model-artifacts-kchitresh/models/",
    hyperparameters=hyperparameters
)

## Run only for the training

In [9]:
estimator.fit({
    "train": "s3://llm-finetuning-dataset-kchitresh/dataset/"
})

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: huggingface-pytorch-training-2026-01-20-04-04-05-613


2026-01-20 04:04:07 Starting - Starting the training job
2026-01-20 04:04:07 Pending - Training job waiting for capacity...
2026-01-20 04:04:28 Pending - Preparing the instances for training...
2026-01-20 04:04:54 Downloading - Downloading input data...
2026-01-20 04:05:19 Downloading - Downloading the training image........................
2026-01-20 04:09:37 Training - Training image download completed. Training in progress...[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
  "cipher": algorithms.TripleDES,[0m
  "class": algorithms.TripleDES,[0m
[34m2026-01-20 04:09:48,691 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2026-01-20 04:09:48,710 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2026-01-20 04:09:48,721 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.

In [17]:
# estimator.latest_training_job.model_data
estimator.model_data

's3://llm-model-artifacts-kchitresh/models/huggingface-pytorch-training-2026-01-20-04-04-05-613/output/model.tar.gz'

In [10]:
# Use this code to check all the accessible services inside your AWS Sagemaker

# from sagemaker import image_uris

# image_uris.retrieve(
#     framework="huggingface",
#     region="ap-south-1",   # change your region
#     version="4.37.0",
#     image_scope="inference"
# )

In [11]:
 # instance_type="ml.g5.xlarge",

In [12]:
# model = HuggingFaceModel(
#     model_data="s3://bucket/model.tar.gz",
#     role=role,
#     entry_point="inference.py",
#     source_dir="inference",
#     transformers_version="4.36",
#     pytorch_version="2.1",
#     py_version="py310"
# )

In [8]:
# --------------------------------------------
# SageMaker Fine-Tuned Summarization Model Deployment (Final)
# --------------------------------------------
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, HuggingFacePredictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
import boto3

# 1️⃣ Execution role (must have SageMaker permissions)
role = sagemaker.get_execution_role()  # Use full IAM ARN if running outside Studio

# 2️⃣ Define your fine-tuned HuggingFace model
model = HuggingFaceModel(
    model_data="s3://llm-model-artifacts-kchitresh/models/huggingface-pytorch-training-2026-01-20-04-04-05-613/output/model.tar.gz",
    role=role,
    transformers_version="4.37.0",
    pytorch_version="2.1.0",
    py_version="py310",
    env={
        'HF_TASK': 'summarization'   # Must set for model to work
    }
)

# 3️⃣ Deploy the model as an endpoint
#    - GPU instance recommended for LLMs (ml.g5.xlarge)
#    - update_endpoint=True allows redeploying safely
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge",           # GPU instance to avoid memory issues
    endpoint_name="live-finetune-endpoint-v2",  # valid endpoint name
    update_endpoint=True
)

# 4️⃣ Wrap the endpoint with JSON serializer/deserializer for easy payload handling
predictor = HuggingFacePredictor(
    endpoint_name="live-finetune-endpoint-v2",
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer()
)

# 5️⃣ Example inference payload
payload = {
    "inputs": "Summarize this text: The quick brown fox jumps over the lazy dog.",
    "parameters": {
        "max_new_tokens": 256  # Optional generation settings
    }
}

# 6️⃣ Invoke the endpoint
result = predictor.predict(payload)

# 7️⃣ Print the summarization result
print("Summarization Result:", result)


----------!Summarization Result: [{'summary_text': 'Summarize this text: The quick brown fox jumps over the lazy dog.\nThe quick brown fox jumps over the lazy dog.\nThe quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps