In [None]:
import os
import sys

ROOT_PATH = os.path.abspath("../../")
sys.path.append(ROOT_PATH)

In [None]:
from genai_kit.aws.claude import BedrockClaude
from genai_kit.utils.images import encode_image_base64_from_file, display_image


claude = BedrockClaude(
    region="us-west-2",
    modelId="anthropic.claude-3-5-sonnet-20240620-v1:0"
)

## Invoke Multimodal LLM with Bedrock API

In [None]:
encoded_base64 = encode_image_base64_from_file(file_path="./sample/food.jpg")
display_image(encoded_base64)

res = claude.invoke_llm_response(text="How many tacos are there?", image=encoded_base64)
print(res)

## Invoke Multimodal LLM with LangChain

In [None]:
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.schema import (
    HumanMessage,
    SystemMessage,
)

def get_prompt(text: str = 'Describe this image', image: str = None):
    content = []

    if image:
        content.append({
            "type": "image_url",
            "image_url": {
                "url": f"data:image/webp;base64,{image}",
            },
        })

    text = PromptTemplate(
            template="""Answer the user's questions, breaking them down into separate points.

            Here is a question from Human:
            <question>
            {question}
            </question>
            """,
            input_variables=["question"]
        ).format(question=text)

    content.append({
        "type": "text",
        "text": text
    })

    messages = [
        SystemMessage(content="You are a helpful assistant."),
        HumanMessage(
            content=content
        )
    ]

    return messages


In [None]:
# streaming response
callback = StreamingStdOutCallbackHandler()
chat = claude.get_chat_model(callback=callback)

prompt = get_prompt(text="How many tacos are there?", image=encoded_base64)
res = await chat.ainvoke(prompt)