In [None]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

# Hub Model configuration. https://huggingface.co/models
hub = {
    "HF_MODEL_ID": "mistralai/Mistral-7B-Instruct-v0.2",
    "HF_NUM_CORES": "2",
    "HF_BATCH_SIZE": "4",
    "HF_SEQUENCE_LENGTH": "4096",
    "HF_AUTO_CAST_TYPE": "bf16",  
    "MAX_BATCH_SIZE": "4",
    "MAX_INPUT_LENGTH": "3686",
    "MAX_TOTAL_TOKENS": "4096",
    "HF_TOKEN": "<REPLACE WITH YOUR TOKEN>",
}

assert hub["HF_TOKEN"] != "", "Please replace '<REPLACE WITH YOUR TOKEN>' with your Hugging Face Hub API token"


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    image_uri=get_huggingface_llm_image_uri("huggingface-neuronx", version="0.0.21"),
    env=hub,
    role=role,
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.inf2.xlarge",
    container_startup_health_check_timeout=100,
    volume_size=512,
)

# send request
predictor.predict(
    {
        "inputs": "What is is the capital of France?",
        "parameters": {
            "do_sample": True,
            "max_new_tokens": 128,
            "temperature": 0.7,
            "top_k": 50,
            "top_p": 0.95,
        }
    }
)

In [27]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'mistralai/Mistral-7B-Instruct-v0.2',
	'SM_NUM_GPUS': json.dumps(1),
    'HF_TASK':'question-answering',
    'HF_API_TOKEN': '',
	'HUGGING_FACE_HUB_TOKEN': ''
}

#assert hub['HUGGING_FACE_HUB_TOKEN'] != '', "You have to provide a token."

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.2"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.xlarge",
	container_startup_health_check_timeout=960,
  )
  
# send request
predictor.predict({
	"inputs": "what is the name of tallest mountain",
})

-----------------!

[{'generated_text': "what is the name of tallest mountain in the world? i think mt everest, isn't it?\n\nYes, you are correct. Mount Everest is the tallest mountain in the world. It is located in the Mahalangur mountain range in the Himalayas, on the border between Nepal and Tibet. Everest has a height of approximately 8,848.86 meters (29,031.7 feet) above sea level, although some sources may give slightly"}]

In [3]:
!pip install -U -q sagemaker

In [6]:
url = 'https://3rtu1qv1te.execute-api.us-east-1.amazonaws.com/prod'

In [28]:
import requests

myobj = json.dumps({"inputs": "tell me story of boy and goldfish"})
x = requests.post(url, data = myobj, headers={'content-type': 'application/json', 'x-api-key':''})
print(x.text)

[{"generated_text": "tell me story of boy and goldfish\n\nA long time ago, in a small village nestled between the mountains, there lived a kind and gentle boy named Hiroshi. He lived with his grandmother in a humble cottage by the edge of a sparkling clear pond. The pond was home to many creatures, but none were more cherished than an iridescent golden goldfish named Mariko.\n\nMariko was unlike any other goldfish in the pond. Her scales shimmered in the"}]


In [43]:
url = 'https://3rtu1qv1te.execute-api.us-east-1.amazonaws.com/prod'
myobj = json.dumps({"inputs": """<|prompter|>What are some cool ideas to do in the summer?<|endoftext|><|assistant|>"""})
x = requests.post(url, data = myobj, headers={'content-type': 'application/json', 'x-api-key':''})

In [52]:
data = json.loads(x.text)
assistant_answer = data[0]['generated_text'].split('<|endoftext|>')[1]
assistant_answer = assistant_answer.strip()[len('<|assistant|>'):]
print(assistant_answer)

1. Beach Day: Spend a day at the beach, soaking up the sun, swimming, building sandcastles, and enjoying the cool water. Don't forget the sunscreen!

2. Picnic in the Park: Pack a basket with your favorite foods, a blanket, and maybe a frisbee or a book, and head to a local park for a relaxing day in the sun.

3. BBQ: Invite friends and family over for a
