In [1]:
!pip install sagemaker ipywidgets --upgrade --quiet


In [None]:
import sagemaker, boto3, json
from sagemaker.session import Session

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name
sess = sagemaker.Session()

In [3]:
# Please use model_version as 2.* if you're using the open-llama model
model_id, model_version, = (
    "huggingface-textgeneration-gpt2",
    "1.*",
)

In [4]:
import ipywidgets as widgets
from sagemaker.jumpstart.notebook_utils import list_jumpstart_models
from sagemaker.jumpstart.filters import And

# Retrieves all Text Generation models available by SageMaker Built-In Algorithms.
filter_value = And("task == textgeneration", "framework == huggingface")
text_generation_models = list_jumpstart_models(filter=filter_value)

# display the model-ids in a dropdown to select a model for inference.
model_dropdown = widgets.Dropdown(
    options=text_generation_models,
    value=model_id,
    description="Select a model",
    style={"description_width": "initial"},
    layout={"width": "max-content"},
)


In [None]:
display(model_dropdown)

In [109]:
# model_id='huggingface-llm-falcon-7b-bf16'
model_id='huggingface-textgeneration-open-llama'
model_version = '2.0'
hub = {}
HF_MODEL_ID = "xlnet-base-cased"  # Pass any other HF_MODEL_ID from - https://huggingface.co/models?pipeline_tag=text-classification&sort=downloads
if model_id == "huggingface-textgeneration-models":
    hub["HF_MODEL_ID"] = HF_MODEL_ID
    hub["HF_TASK"] = "text-generation"

In [None]:
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base


endpoint_name = name_from_base(f"jumpstart-example-{model_id}")

inference_instance_type = "ml.r5.4xlarge"

# Retrieve the inference docker container uri. This is the base HuggingFace container image for the default model above.
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,  # automatically inferred from model_id
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=inference_instance_type,
)


# Retrieve the model uri. This includes the pre-trained nvidia-ssd model and parameters.
model_uri = model_uris.retrieve(
    model_id=model_id, model_version=model_version, model_scope="inference"
)


# Create the SageMaker model instance
model = Model(
    image_uri=deploy_image_uri,
    model_data=model_uri,
    role=aws_role,
    predictor_cls=Predictor,
    name=endpoint_name,
    env=hub,
)

# deploy the Model. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the sagemaker API.
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    predictor_cls=Predictor,
    endpoint_name=endpoint_name,
    tags = [{'Key': 'auto-maintain', 'Value':'true'}]
)



In [None]:
def query(model_predictor, text):
    """Query the model predictor."""

    encoded_text = text.encode("utf-8")

    query_response = model_predictor.predict(
        encoded_text,
        {
            "ContentType": "application/x-text",
            "Accept": "application/json",
        },
    )
    return query_response


def parse_response(query_response):
    """Parse response and return the generated text."""

    model_predictions = json.loads(query_response)
    generated_text = model_predictions["generated_text"]
    return generated_text

In [None]:
newline, bold, unbold = "\n", "\033[1m", "\033[0m"
text = "Which movie won the"

query_response = query(model_predictor, text)
generated_text = parse_response(query_response)
print(f"Input text: {text}{newline}" f"Generated text: {bold}{generated_text}{unbold}{newline}")

In [None]:
# 6. Clean up the endpoint
# Delete the SageMaker endpoint

model_predictor.delete_model()
model_predictor.delete_endpoint()