# OCR Tool Calling
Agent: https://python.langchain.com/docs/tutorials/agents/

Tool calling: https://python.langchain.com/docs/concepts/tool_calling/

In [87]:
from dotenv import load_dotenv
import os
from pathlib import Path

from langchain_groq import ChatGroq
from langchain_core.tools import tool

In [2]:
# 刪除環境變量
if "GROQ_API_KEY" in os.environ:
    del os.environ["GROQ_API_KEY"]

In [3]:
# 找根目錄
def find_project_root(current_path, marker=".git"):
    current_path = Path(current_path).resolve()
    for parent in current_path.parents:
        if (parent / marker).exists():
            return parent
    return None

current_path = os.getcwd()
project_root = find_project_root(current_path, marker=".git")
print("Project root:", project_root)

# Load .env file
print(f"Successfully loaded env variables: {load_dotenv(project_root / ".env")}")

Project root: /Users/allen/Documents/code/Exchange_QA_Chatbot
Successfully loaded env variables: True


In [22]:
# Load env variables into python variables
print("Loaded env variables:")
print(f"GROQ_API_KEY = {os.getenv("GROQ_API_KEY")}")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

Loaded env variables:
GROQ_API_KEY = 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


In [23]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0.7,
    max_retries=2,
    api_key=GROQ_API_KEY
)

In [85]:
@tool
def image_ocr(image_file_path: str) -> str:
    """Perform Optical Character Recognition (OCR) on an image to extract text.

    Args:
        image_file_path: the image file path
    """
    # do easyocr
    image_text = [image_file_path, ["Title", "Content", "Page_no"]]
    return "This is an image with text written on it."

tools = [image_ocr]

In [81]:
print(f"Name:\n{image_ocr.name}")
print("=====")
print(f"Description:\n{image_ocr.description}")
print("=====")
print(f"Args:\n{image_ocr.args}")

Name:
image_ocr
=====
Description:
Perform Optical Character Recognition (OCR) on an image to extract texts.

    Args:
        image_file_path: the image file path
=====
Args:
{'image_file_path': {'title': 'Image File Path', 'type': 'string'}}


In [82]:
image_ocr.invoke({"image_file_path": "data/images/Step1校內徵選.jpg"})

'This is a image that contains texts.'

In [38]:
llm_with_tools = llm.bind_tools(tools)

In [75]:
system_prompt = f"""
When you receive a tool call response, use the output to format an answer to the orginal user question.

You are a helpful assistant with tool calling capabilities.
"""

user_prompt = f"""
Here's a list of images that you can perform Optical Character Recognition (OCR) on to extract the texts in the image.
The images are provided with their file path:
1. "data/images/Step1校內徵選.jpg"

Step1校內徵選的流程是什麼？
"""

# I would like to know what texts are written on "Step1 校內徵選" image. Please help me out.

# I provided an image file with the file path: "data/images/Step1校內徵選.jpg".
# Please perform Optical Character Recognition (OCR) on the image and show me the result.

messages = [
    ("system", system_prompt),
    ("human", user_prompt),
]

In [76]:
# Invoke
response = llm_with_tools.invoke(messages)

for key, value in vars(response).items():
    print(f"{key}: {value}")

content: 
additional_kwargs: {'tool_calls': [{'id': 'call_fjk0', 'function': {'arguments': '{"image_file_path": "data/images/Step1校內徵選.jpg"}', 'name': 'image_ocr'}, 'type': 'function'}]}
response_metadata: {'token_usage': {'completion_tokens': 27, 'prompt_tokens': 356, 'total_tokens': 383, 'completion_time': 0.098181818, 'prompt_time': 0.042432677, 'queue_time': 0.019514951999999995, 'total_time': 0.140614495}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_fcc3b74982', 'finish_reason': 'tool_calls', 'logprobs': None}
type: ai
name: None
id: run-d10394e2-827a-477d-bd05-5b595067b156-0
example: False
tool_calls: [{'name': 'image_ocr', 'args': {'image_file_path': 'data/images/Step1校內徵選.jpg'}, 'id': 'call_fjk0', 'type': 'tool_call'}]
invalid_tool_calls: []
usage_metadata: {'input_tokens': 356, 'output_tokens': 27, 'total_tokens': 383}


In [None]:
# async for chunk in llm_with_tools.astream(messages):
#     print(chunk.tool_call_chunks)