# Amazon Bedrock - API Gateway invocation with Langchain

## Install requirements

In [None]:
%pip install -q langchain==0.1.16

### Setup Environment

We are going to invoke Amazon API Gateway through `langchain`

In [None]:
from langchain.chains import LLMChain
from langchain.llms.amazon_api_gateway import AmazonAPIGateway
from langchain.prompts import PromptTemplate

### Setting up API Url

In [None]:
api_url = "<API_URL>"
api_key = "<API_KEY>"
team_id = "<TEAM_ID>"

### Define Default Prompt

In [None]:
PROMPT_DEFAULT = PromptTemplate(
    template="{question}", input_variables=["question"]
)

***

### AmazonAPIGateway class Extended

This is an example of the AmazonAPIGateway class extended for handling both `invoke_model` and `invoke_model_with_response_stream` with long-polling

In [None]:
import ast
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.amazon_api_gateway import AmazonAPIGateway
from langchain.llms.utils import enforce_stop_tokens
import re
import requests
import time
from typing import Any, List, Optional

def convert_to_dict_list(input):
    try:
        if isinstance(input, str):
            result = ast.literal_eval(input)
            if isinstance(result, list) and all(isinstance(item, dict) for item in result):
                return result
            else:
                return None
        elif isinstance(input, list):
            return input
        else:
            raise ValueError
    except (ValueError, SyntaxError):
        return None

'''
This is an utility function for converting prompts from Claude < 3 to the new messages API.
Prerequisites:
- Make sure you are putting your system prompt in the tags <system> and </system>
- If you have a conversation history, make sure you are putting it in the tags <history> and </history>

This function will work also if you are directly providing messages in an array format
'''
def convert_prompt_to_messages(prompt, model_kwargs=dict()):
    messages = convert_to_dict_list(prompt)

    if messages is None:
        # Find the content between <system> and </system> tags
        system_content_match = re.search(r'<system>(.*?)</system>', prompt, re.DOTALL)

        if system_content_match:
            # Extract the content between the tags
            system_prompt = system_content_match.group(1)

            # Remove the <system> and </system> tags and their content from the original text
            prompt = re.sub(r'<system>.*?</system>', '', prompt, flags=re.DOTALL)
        else:
            system_prompt = None

        prompt = prompt.replace("<history>", "")
        prompt = prompt.replace("</history>", "")

        messages = []
        role_regex = re.compile(r'(Human:|Assistant:)\s?(.*?)(?=Human:|Assistant:|$)', re.DOTALL)

        for match in role_regex.finditer(prompt):
            role, content = match.groups()
            role = role.strip(':').lower()
            if role == "human" or role == "Human":
                role = "user"
            else:
                role = "assistant"
            messages.append({"role": role, "content": content.strip()})
    else:
        if len(messages) > 0:
            if len(messages) == 1:
                system_prompt = None
            else:
                if messages[0]["role"] == "user" and messages[1]["role"] != "assistant":
                    system_prompt = messages.pop(0)["content"]
                else:
                    system_prompt = None
        else:
            system_prompt = None

    if system_prompt is not None:
        model_kwargs["system"] = system_prompt

    return messages, model_kwargs

class AmazonAPIGatewayExtended(AmazonAPIGateway):
    streaming: bool = False
    polling_wait: int = 2
    max_time: int = 120
    chat: bool = False

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Call out to Amazon API Gateway model.

                Args:
                    prompt: The prompt to pass into the model.
                    stop: Optional list of stop words to use when generating.

                Returns:
                    The string generated by the model.

                Example:
                    .. code-block:: python

                        response = se("Tell me a joke.")
                """

        if self.chat:
            _model_kwargs = self.model_kwargs or {}
            messages, _model_kwargs = convert_prompt_to_messages(prompt, _model_kwargs)

            payload = {
                "inputs": messages,
                "parameters": _model_kwargs
            }
        else:
            _model_kwargs = self.model_kwargs or {}
            payload = self.content_handler.transform_input(prompt, _model_kwargs)

        try:
            response = requests.post(
                self.api_url,
                headers=self.headers,
                json=payload,
            )
            if not self.streaming:
                text = self.content_handler.transform_output(response)
            else:
                request_id = response.json()[0]["request_id"]

                start_time = time.time()

                while (time.time() - start_time) < self.max_time:
                    response = requests.post(
                        self.api_url + f"&requestId={request_id}",
                        headers=self.headers,
                        json={},
                    )

                    if "generated_text" in response.json()[0]:
                        break

                    time.sleep(self.polling_wait)

                text = self.content_handler.transform_output(response)

        except Exception as error:
            raise ValueError(f"Error raised by the service: {error}")

        if stop is not None:
            text = enforce_stop_tokens(text, stop)

        if response.status_code != 200:
            raise Exception(text)

        return text

### AmazonAPIGateway class for Embeddings

In [None]:
from langchain_core.embeddings import Embeddings
import requests
from typing import List

class AmazonAPIGatewayEmbeddings(Embeddings):
    def __init__(self, api_url, headers):
        self.api_url = api_url
        self.headers = headers

    def embed_documents(self, texts: List[str], parameters: dict = {}) -> List[List[float]]:
        results = []
        for text in texts:
            response = requests.post(
                self.api_url,
                json={"inputs": text, "parameters": parameters},
                headers=self.headers
            )
            results.append(response.json()[0]["embedding"])

        return results

    def embed_query(self, text: str, parameters: dict = {}) -> List[float]:
        response = requests.post(
                self.api_url,
                json={"inputs": text, "parameters": parameters},
                headers=self.headers
            )

        return response.json()[0]["embedding"]

***

### Amazon Titan Text Express

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Amazon Titan Text Express - Streaming

In [None]:
model_id = "amazon.titan-text-express-v1"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Amazon Titan Embeddings

In [None]:
model_id = "amazon.titan-embed-text-v1"

prompt = """
What is Amazon Bedrock?"
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)

In [None]:
embeddings.embed_query(prompt)

### Amazon Titan Mulitmodal Embeddings

In [None]:
import base64

In [None]:
image_path = "./images/img513074217-1493907994177.webp"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "amazon.titan-embed-image-v1"

prompt = byte_file

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings-image"
    }
)

In [None]:
embeddings.embed_query(prompt)

### Anthropic Claude 3 Sonnet

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "max_tokens": 4096,
    "temperature": 0.2,
    "anthropic_version": "",
    "top_p": 0.9,
    "stop_sequences": ["\n\nHuman:"]
}

prompt = [
    {'role': 'user', 'content': 'What is Amazon Bedrock?'}
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    chat=True
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Amazon Claude 3 Sonnet - Streaming

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "max_tokens": 4096,
    "temperature": 0.2,
    "anthropic_version": "",
    "top_p": 0.9,
    "stop_sequences": ["\n\nHuman:"]
}

prompt = [
    {'role': 'user', 'content': 'What is Amazon Bedrock?'}
]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Amazon Claude 3 Sonnet - Multi-modal

In [None]:
import base64

In [None]:
image_path = "./images/img513074217-1493907994177.webp"

with open(image_path, 'rb') as image_file:
    byte_file = base64.b64encode(image_file.read()).decode('utf-8')

In [None]:
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"

model_kwargs = {
    "max_tokens": 4096,
    "temperature": 0.2,
    "anthropic_version": "",
    "top_p": 0.9,
    "stop_sequences": ["\n\nHuman:"]
}

prompt = [{"role": "user", "content": [
    {
        "type": "image",
        "source": {
            "type": "base64",
            "media_type": "image/webp",
            "data": byte_file,
        }
    },
    {"type": "text", "text": "What is in the image?"}
]}]

For using Bedrock boto3 `invoke_model` with Messages API, include the parameter `messages_api` as `True` or `true` in the header.

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true",
        "messages_api": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True,
    chat=True
)

In [None]:
response = llm._call(prompt=prompt)

# Print response
print(response)

### Anthropic Claude 2.1

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "max_tokens_to_sample": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Amazon Claude 2.1 - Streaming

In [None]:
model_id = "anthropic.claude-v2:1"

model_kwargs = {
    "max_tokens_to_sample": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

For using Bedrock boto3 `invoke_model_with_response_stream` with long-polling, include the parameter `streaming` as `True` or `true` in the header

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "streaming": "true"
    },
    model_kwargs=model_kwargs,
    streaming=True
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### AI21 Jurassic

In [None]:
model_id = "ai21.j2-ultra"

model_kwargs = {
    "maxTokens": 4096,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Cohere Command

In [None]:
model_id = "cohere.command-text-v14"

model_kwargs = {
    "max_tokens": 4000,
    "temperature": 0.2
}

prompt = """
What is Amazon Bedrock?"
"""

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)

### Cohere Embed Multilingual

In [None]:
model_id = "cohere.embed-multilingual-v3"

model_kwargs = {
    "input_type": "search_document"
}

prompt = """
What is Amazon Bedrock?"
"""

For using Bedrock boto3 `invoke_model` for generating embeddings, include the parameter `type` as `embeddings` in the header

In [None]:
embeddings = AmazonAPIGatewayEmbeddings(
    api_url=f"{api_url}/invoke_model?model_id={model_id}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id,
        "type": "embeddings"
    }
)

In [None]:
embeddings.embed_query(prompt)

### Amazon Bedrock - Custom Model

#### Amazon Titan Text Express

In [None]:
model_id = "amazon.titan-text-express-v1"
model_arn = "<PROVISIONED_THROUGHPUT_ARN>"

model_kwargs = {
    "maxTokenCount": 4096,
    "temperature": 0.2
}

prompt = "What is Amazon Bedrock?"

For using Bedrock boto3 `invoke_model` with a custom model, include the parameter `model_arn` in the query

In [None]:
llm = AmazonAPIGatewayExtended(
    api_url=f"{api_url}/invoke_model?model_id={model_id}&model_arn={model_arn}",
    headers={
        "x-api-key": api_key,
        "team_id": team_id
    },
    model_kwargs=model_kwargs
)

chain = LLMChain(
    llm=llm,
    prompt=PROMPT_DEFAULT,
    verbose=True
)

In [None]:
response = chain.predict(question=prompt)

# Print response
print(response)