# Amazon Bedrock - API Gateway invocation

### Setup Environment

We are going to invoke Amazon API Gateway through `requests`

In [None]:
%pip install -q Pillow
%pip install -q requests

In [None]:
import requests

### Setting up API Url

In [None]:
api_url = "<API_URL>"
api_key = "<API_KEY>"
team_id = "<TEAM_ID>"

### List Foundation Models

In [None]:
response = requests.get(
    f"{api_url}/list_foundation_models",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    }
)

text = response.json()[0]

print(text)

### Amazon Titan Express

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
    }
)

text = response.json()[0]["generated_text"]

print(text)

### Amazon Titan Express

Use Converse API

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)

text = response.json()[0]["generated_text"]

print(text)

### Amazon Titan - Streaming

In [None]:
import time

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Amazon Titan - Streaming

Use Converse API

In [None]:
import time

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Amazon Titan Embeddings G1 - Text

In [None]:
model_id = "amazon.titan-embed-text-v1"

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    },
    json={"inputs": prompt},
)

text = response.json()[0]["embedding"]

print(text)

### Amazon Titan Text Embeddings v2

In [None]:
model_id = "amazon.titan-embed-text-v2:0"

prompt = "What is Amazon Bedrock?"

parameters = {
    "dimensions": 1024,
    "normalize": True
}

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    },
    json={"inputs": prompt, "parameters": parameters},
)

text = response.json()[0]["embedding"]

print(text)

### Amazon Titan Multimodal Embeddings

In [None]:
import base64

In [None]:
image_path = "./images/battery_image.png"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "amazon.titan-embed-image-v1"

prompt = byte_file

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings-image"
    },
    json={"inputs": prompt},
)

text = response.json()[0]["embedding"]

print(text)

### Amazon Titan Image Generator

In [None]:
import base64
import io
from PIL import Image

In [None]:
model_id = "amazon.titan-image-generator-v1"

model_kwargs = {
    "taskType": "TEXT_IMAGE",
    "imageGenerationConfig": {
      "cfgScale": 8,
      "seed": 0,
      "quality": "standard",
      "width": 512,
      "height": 512,
      "numberOfImages": 1
    }
}

prompt = """
blue backpack on a table
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `image` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"textToImageParams": {"text": prompt}, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "image"
    }
)
response_body = response.json()[0]["images"]

base_64_img_str = response_body[0]

image = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, "utf-8"))))
image

### Anthropic Claude 3 Sonnet

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"]
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)
text = response.json()[0]["generated_text"]

print(text)

If you want to add a system prompt

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
    "system": "Always translate the answer in Italian"
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)
text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 3 Sonnet - Streaming

In [None]:
import time

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 3 Sonnet - Multi-modal

In [None]:
import base64
import time

In [None]:
image_path = "./images/battery_image.png"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {
        "role": "user",
        "content": [
            {
                    "image": {
                        "format": 'webp',
                        "source": {
                            "bytes": byte_file
                        }
                    }
            },
            {
                "text": "what is in the image?"
            },
        ]
    }
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)

text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 3 Sonnet - Multi-modal Streaming

Use Converse API

In [None]:
import base64
import time

In [None]:
image_path = "./images/battery_image.png"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"]
}

prompt = [
    {
        "role": "user",
        "content": [
            {
                    "image": {
                        "format": 'webp',
                        "source": {
                            "bytes": byte_file
                        }
                    }
            },
            {
                "text": "what is in the image?"
            }
        ]
    }
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 2.1

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "max_tokens_to_sample": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    }
)
text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 2.1

Use Converse API

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)
text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 2.1 - Streaming

In [None]:
import time

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "max_tokens_to_sample": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Anthropic Claude 2.1 - Streaming

Use Converse API

In [None]:
import time

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true",
        "streaming": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Mistral Large

In [None]:
model_id = "mistral.mistral-large-2402-v1:0"

model_kwargs = {
    "max_tokens": 4096,
    "temperature": 0.2
}

prompt = "<s>[INST]What is Amazon Bedrock?[/INST]"

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    }
)
text = response.json()[0]["generated_text"]

print(text)

### Mistral Large - Streaming

In [None]:
import time

In [None]:
model_id = "mistral.mistral-large-2402-v1:0"

model_kwargs = {
    "max_tokens": 4096,
    "temperature": 0.2
}

prompt = "<s>[INST]What is Amazon Bedrock?[/INST]"

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Mistral Large - Streaming

Converse API

In [None]:
import time

In [None]:
model_id = "mistral.mistral-large-2402-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true",
        "streaming": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### AI21 Jurassic Ultra

In [None]:
model_id = "ai21.j2-ultra"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?
"""

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    }
)
text = response.json()[0]["generated_text"]

print(text)

### AI21 Jurassic Ultra

Use Converse API

In [None]:
model_id = "ai21.j2-ultra"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)
text = response.json()[0]["generated_text"]

print(text)

### Cohere Command

In [None]:
model_id = "cohere.command-text-v14"

model_kwargs = {
    "max_tokens": 4000,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?
"""

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    }
)
text = response.json()[0]["generated_text"]

print(text)

### Cohere Embed Multilingual

In [None]:
model_id = "cohere.embed-multilingual-v3"

model_kwargs = {
    "input_type": "search_document"
}

prompt = """
Cosa è Amazon Bedrock?
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)
text = response.json()[0]["embedding"]

print(text)

### Stability AI

In [None]:
import base64
import io
from PIL import Image

In [None]:
model_id = "stability.stable-diffusion-xl-v1"

model_kwargs = {
    "cfg_scale": 5,
    "seed": 452345,
    "steps": 60,
    "style_preset": "photographic",
    "clip_guidance_preset": "FAST_GREEN",
    "sampler": "K_DPMPP_2S_ANCESTRAL",
    "width": 768
}

prompt = """
a beautiful mountain landscape
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `image` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"text_prompts": ([{"text": prompt, "weight": 1.0}]), "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "image"
    }
)

response_body = response.json()[0]["artifacts"]

base_64_img_str = response_body[0].get("base64")

image = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, "utf-8"))))
image

### Amazon Bedrock - Custom Model

#### Amazon Titan Express

In [None]:
model_id = "amazon.titan-text-express-v1"
model_arn = "<PROVISIONED_THROUGHPUT_ARN>"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model` with a custom model, include the parameter `model_arn` in the query

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}&model_arn={model_arn}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
    }
)

text = response.json()[0]["generated_text"]

print(text)

### Amazon Bedrock - Cross-Region Inference

#### Amazon Anthropic Claude 3.5 Sonnet

In [None]:
import time

In [None]:
model_id = "<INFERENCE_PROFILE_ID>"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    }
)

request_id = response.json()[0]["request_id"]

start_time = time.time()
max_time = 120

while (time.time() - start_time) < max_time:
    response = requests.post(
        f"{api_url}/invoke_model?model_id={model_id}&requestId={request_id}",
        json={},
        headers={
            "x-api-key": api_key,
            "team_id": team_id
        }
    )

    if "generated_text" in response.json()[0]:
        break

    time.sleep(2)

text = response.json()[0]["generated_text"]

print(text)

### Amazon SageMaker Endpoint

#### All MiniLM L6 v2

All MiniLM L6 v2 embeddings model deployed from Amazon SageMaker JumpStart.

##### Important! Refer to the specific model documentation for defining the proper inputs and parameters for the model

In [None]:
model_id = "<MODEL_NAME>"

model_kwargs = {
    "mode": "embedding",
    "InferenceComponentName": "<INFERENCE_COMPONENT_NAME>"
}

prompt = {
    "text_inputs": "Hello"
}

For using Bedrock boto3 `invoke_model` with a custom model, include the parameter `model_arn` in the query

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)

text = response.json()[0]["embedding"]

print(text)

#### Meta Llama-3 - Messages API

Fine-tuned Meta Llama-3 model hosted in Amazon SageMaker through Hugging Face TGI container.

##### Important! Refer to the specific model documentation for defining the proper inputs and parameters for the model

In [None]:
model_id = "<MODEL_NAME>"

model_kwargs = {
    "model": "placeholder-model", # placeholder, needed
    "top_p": 0.9,
    "temperature": 0.2,
    "return_full_text": True,
    "stop": [],
}

prompt = [
        {
            "role": "user", "content": "Hello"
        }
    ]

For using Bedrock boto3 `invoke_model` with a custom model, include the parameter `model_arn` in the query

In [None]:
response = requests.post(
    f"{api_url}/invoke_model?model_id={model_id}",
    json={"inputs": prompt, "parameters": model_kwargs},
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    }
)

text = response.json()[0]

print(text)