# Amazon Bedrock - API Gateway invocation with Langchain

## Install requirements

In [None]:
%pip install -q langchain==0.2.14 langchain-community==0.2.12

### Setup Environment

We are going to invoke Amazon API Gateway through `langchain`

### Setting up API Url

In [None]:
api_url = "<API_URL>"
api_key = "<API_KEY>"
team_id = "<TEAM_ID>"

***

### AmazonAPIGateway class Extended

This is an example of the AmazonAPIGateway class extended for handling both `invoke_model` and `invoke_model_with_response_stream` with long-polling

In [None]:
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.amazon_api_gateway import AmazonAPIGateway
from langchain.llms.utils import enforce_stop_tokens
import requests
import time
from typing import Any, List, Optional

class AmazonAPIGatewayExtended(AmazonAPIGateway):
    streaming: bool = False
    polling_wait: int = 2
    max_time: int = 120
    chat: bool = False

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Call out to Amazon API Gateway model.

                Args:
                    prompt: The prompt to pass into the model.
                    stop: Optional list of stop words to use when generating.

                Returns:
                    The string generated by the model.

                Example:
                    .. code-block:: python

                        response = se("Tell me a joke.")
                """

        if self.chat:
            payload = {
                "inputs": prompt,
                "parameters": self.model_kwargs or {}
            }
        else:
            _model_kwargs = self.model_kwargs or {}
            payload = self.content_handler.transform_input(prompt, _model_kwargs)

        try:
            response = requests.post(
                self.api_url,
                headers=self.headers,
                json=payload,
            )
            if not self.streaming:
                text = self.content_handler.transform_output(response)
            else:
                request_id = response.json()[0]["request_id"]

                start_time = time.time()

                while (time.time() - start_time) < self.max_time:
                    response = requests.post(
                        self.api_url + f"&requestId={request_id}",
                        headers=self.headers,
                        json={},
                    )

                    if "generated_text" in response.json()[0]:
                        break

                    time.sleep(self.polling_wait)

                text = self.content_handler.transform_output(response)

        except Exception as error:
            raise ValueError(f"Error raised by the service: {error}")

        if stop is not None:
            text = enforce_stop_tokens(text, stop)

        if response.status_code != 200:
            raise Exception(text)

        return text

### AmazonAPIGateway class for Embeddings

In [None]:
from langchain_core.embeddings import Embeddings
import requests
from typing import List

class AmazonAPIGatewayEmbeddings(Embeddings):
    def __init__(self, api_url, headers):
        self.api_url = api_url
        self.headers = headers

    def embed_documents(self, texts: List[str], parameters: dict = {}) -> List[List[float]]:
        results = []
        for text in texts:
            response = requests.post(
                self.api_url,
                json={"inputs": text, "parameters": parameters},
                headers=self.headers
            )
            results.append(response.json()[0]["embedding"])

        return results

    def embed_query(self, text: str, parameters: dict = {}) -> List[float]:
        response = requests.post(
                self.api_url,
                json={"inputs": text, "parameters": parameters},
                headers=self.headers
            )

        return response.json()[0]["embedding"]

***

### Amazon Titan Text Express

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Titan Text Express

Use Converse API

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Titan Text Express - Streaming

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Titan Text Express - Streaming

Use Converse API

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Titan Embeddings G1 - Text

In [None]:
model_id = "amazon.titan-embed-text-v1"

prompt = """
What is Amazon Bedrock?"
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)

In [None]:
embeddings.embed_query(prompt)

### Amazon Titan Text Embeddings v2

In [None]:
model_id = "amazon.titan-embed-text-v2:0"

prompt = """
What is Amazon Bedrock?"
"""

parameters = {
    "dimensions": 1024,
    "normalize": True
}

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)

In [None]:
embeddings.embed_query(prompt, parameters)

### Amazon Titan Mulitmodal Embeddings

In [None]:
import base64

In [None]:
image_path = "./images/battery_image.png"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "amazon.titan-embed-image-v1"

prompt = byte_file

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings-image"
    }
)

In [None]:
embeddings.embed_query(prompt)

### Anthropic Claude 3 Sonnet

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"]
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Claude 3 Sonnet - Streaming

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"]
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

If you want to add a system prompt

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
    "system": "Always translate the answer in Italian"
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Claude 3 Sonnet - Multi-modal

In [None]:
import base64

In [None]:
image_path = "./images/battery_image.png"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {
        "role": "user",
        "content": [
            {
                    "image": {
                        "format": 'webp',
                        "source": {
                            "bytes": byte_file
                        }
                    }
            },
            {
                "text": "what is in the image?"
            },
        ]
    }
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

In [None]:
response = llm._call(prompt=prompt)

# Print response
print(response)

### Anthropic Claude 2.1

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "max_tokens_to_sample": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Claude 2.1 - Streaming

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "max_tokens_to_sample": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Claude 2.1 - Streaming

Use Converse API

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"]
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true",
        "streaming": "true"
    },
    model_kwargs=model_kwargs,
    chat=True,
    streaming=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### AI21 Jurassic

In [None]:
model_id = "ai21.j2-ultra"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### AI21 Jurassic

Use Converse API

In [None]:
model_id = "ai21.j2-ultra"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9
}

prompt = [{"role": "user", "content": [{"text": "What is Amazon Bedrock?"}]}]

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Cohere Command

In [None]:
model_id = "cohere.command-text-v14"

model_kwargs = {
    "max_tokens": 4000,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Cohere Embed Multilingual

In [None]:
model_id = "cohere.embed-multilingual-v3"

model_kwargs = {
    "input_type": "search_document"
}

prompt = """
What is Amazon Bedrock?"
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)

In [None]:
embeddings.embed_query(prompt)

### Amazon Bedrock - Custom Model

#### Amazon Titan Text Express

In [None]:
model_id = "amazon.titan-text-express-v1"
model_arn = "<PROVISIONED_THROUGHPUT_ARN>"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model` with a custom model, include the parameter `model_arn` in the query

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}&model_arn={model_arn}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)

### Amazon Bedrock - Cross-Region Inference

#### Amazon Anthropic Claude 3.5 Sonnet

In [None]:
model_id = "<INFERENCE_PROFILE_ID>"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2,
    "topP": 0.9,
    "stopSequences": ["\n\nHuman:"],
}

prompt = [
    {'role': 'user', 'content': [{"text": "What is Amazon Bedrock?"}]}
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

In [None]:
response = llm._call(prompt)

# Print response
print(response)