In [1]:
!pip install sagemaker -U

Collecting sagemaker
  Downloading sagemaker-2.237.0-py3-none-any.whl.metadata (16 kB)
Collecting boto3<2.0,>=1.35.75 (from sagemaker)
  Downloading boto3-1.35.76-py3-none-any.whl.metadata (6.7 kB)
Collecting omegaconf<2.3,>=2.2 (from sagemaker)
  Downloading omegaconf-2.2.3-py3-none-any.whl.metadata (3.9 kB)
Collecting sagemaker-core<2.0.0,>=1.0.17 (from sagemaker)
  Downloading sagemaker_core-1.0.17-py3-none-any.whl.metadata (4.9 kB)
Collecting botocore<1.36.0,>=1.35.76 (from boto3<2.0,>=1.35.75->sagemaker)
  Downloading botocore-1.35.76-py3-none-any.whl.metadata (5.7 kB)
Collecting pydantic<3.0.0,>=2.0.0 (from sagemaker-core<2.0.0,>=1.0.17->sagemaker)
  Downloading pydantic-2.10.3-py3-none-any.whl.metadata (172 kB)
Collecting platformdirs (from sagemaker)
  Downloading platformdirs-4.3.6-py3-none-any.whl.metadata (11 kB)
Collecting mock<5.0,>4.0 (from sagemaker-core<2.0.0,>=1.0.17->sagemaker)
  Downloading mock-4.0.3-py3-none-any.whl.metadata (2.8 kB)
Collecting annotated-types>=0.6

In [3]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::615299768742:role/service-role/AmazonSageMaker-ExecutionRole-20241207T181438
sagemaker session region: us-east-1


In [7]:
from sagemaker.huggingface.model import HuggingFaceModel

# Hub model configuration <https://huggingface.co/models>
hub = {
  'HF_MODEL_ID':'distilbert-base-uncased-distilled-squad', # model_id from hf.co/models
  'HF_TASK':'question-answering'                           # NLP task you want to use for predictions
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,                                                # configuration for loading model from Hub
   role=role,                                              # IAM role with permissions to create an endpoint
   transformers_version="4.26",                             # Transformers version used
   pytorch_version="1.13",                                  # PyTorch version used
   py_version='py39',                                      # Python version used
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.xlarge"
)

# example request: you always need to define "inputs"
data = {
"inputs": {
	"question": "What is used for inference?",
	"context": "My Name is Philipp and I live in Nuremberg. This model is used with sagemaker for inference."
	}
}

# request
predictor.predict(data)

-----!

{'score': 0.9987204670906067, 'start': 68, 'end': 77, 'answer': 'sagemaker'}

In [8]:
data = {
"inputs": {
	"question": "How many indeginious territories exist in amazon rainforest?",
	"context": "The Amazon rainforest,also called Amazon jungle or Amazonia, is a moist broadleaf tropical rainforest in the Amazon biome that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 km2 (2,700,000 sq mi), of which 6,000,000 km2 (2,300,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations and 3,344 indigenous territories."
	}
}

In [9]:
predictor.predict(data)

{'score': 0.9673440456390381, 'start': 366, 'end': 371, 'answer': '3,344'}