# Agent
https://python.langchain.com/v0.1/docs/use_cases/tool_use/quickstart/#chains

## Import

In [1]:
from dotenv import load_dotenv
import os
from pathlib import Path

from langchain_groq import ChatGroq
from langchain_core.tools import tool
from langchain import hub
from langchain.agents import AgentExecutor, create_tool_calling_agent

from groq import Groq
import base64
import json

import chromadb
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from uuid import uuid4

## Setup

In [2]:
# 刪除環境變量
if "GROQ_API_KEY" in os.environ:
    del os.environ["GROQ_API_KEY"]

In [3]:
# 找根目錄
def find_project_root(current_path, marker=".git"):
    current_path = Path(current_path).resolve()
    for parent in current_path.parents:
        if (parent / marker).exists():
            return parent
    return None

current_path = os.getcwd()
project_root = find_project_root(current_path, marker=".git")
print("Project root:", project_root)

# Load .env file
print(f"Successfully loaded env variables: {load_dotenv(project_root / ".env")}")

Project root: /Users/allen/Documents/code/Exchange_QA_Chatbot
Successfully loaded env variables: True


In [4]:
# Load env variables into python variables
print("Loaded env variables:")
print(f"GROQ_API_KEY = {os.getenv("GROQ_API_KEY")}")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

Loaded env variables:
GROQ_API_KEY = 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


## ChromaDB

In [45]:
chroma_client = chromadb.PersistentClient(path="vector_db")

In [46]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
)

In [47]:
# Delete existing collections
current_collections = chroma_client.list_collections()
for collections in current_collections:
    chroma_client.delete_collection(name="documents")

# get_or_create_collection
documents = chroma_client.get_or_create_collection(
    name="documents", 
    metadata={"hnsw:space": "cosine", "hnsw:search_ef": 100}
)

def embed_pdf(pdf_file_path):
    loader = PyPDFLoader(pdf_file_path)
    pages = []
    pages = loader.load()

    for index in range(len(pages)):
        documents_list = text_splitter.split_text(pages[index].page_content)
        documents.add(
            documents=documents_list,
            ids=[f"id-{str(uuid4())}" for i in range(len(documents_list))],
            metadatas=[{"source": pages[index].metadata["source"], "page": index+1} for i in range(len(documents_list))]
        )

documents_folder = "../data/documents"
pdf_files = [f"{documents_folder}/{pdf_file_path}" for pdf_file_path in os.listdir(documents_folder)]

for pdf_file in pdf_files:
    embed_pdf(pdf_file)

In [None]:
def print_query_results(query_list: list, query_results: dict) -> None:
    result_count = len(query_results['ids'][0])

    for i in range(len(query_list)):
        print(f'Results for query: {query_list[i]}')

        for j in range(result_count):
            id       = query_results["ids"][i][j]
            distance = query_results['distances'][i][j]
            document = query_results['documents'][i][j]
            metadata = query_results['metadatas'][i][j]

            print(f'id: {id}, distance: {distance}, metadata: {metadata}, document: {document}')

In [None]:
query_text = "學生申請繳件有哪些方式？"

documents.query(
    query_texts=[query_text],
    n_results=25
)

## LLM

### Tools

In [48]:
@tool
def image_interpreting(image_path: str) -> str:
    """Interpret, explain, analyse or describe the content and underlying logic of any given image and return in JSON format.

    Args:
        image_path: the image file path
    """
    # Function to encode the image
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    
    base64_image = encode_image(image_path)
    request_text = "You are an assistant skilled at interpreting the content and underlying logic in images. Texts in this image are in Traditional Chinese. List what you observe in this image in JSON format."
    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
    chat_completion = client.chat.completions.create(
        model="llama-3.2-90b-vision-preview",
        messages=[
            {
                "role": "user",
                "content": [
                    {   
                        "type": "text",
                        "text": request_text
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
        temperature=0.0,
        top_p=1,
        stream=False,
        response_format={"type": "json_object"},
        stop=None,
    )
    return str(json.loads(chat_completion.choices[0].message.content))
    # return "{'title': 'Step2. 校外徵選', 'sections': [{'title': 'Nomination', 'description': '校方提名', 'dates': ['第一學期：2月至3月', '第二學期：8月至9月']}, {'title': 'Application', 'description': '學生申請文件', 'description2': '申請文件及期程依各交換校規定辦理'}, {'title': 'Result', 'description': '結果通知', 'description2': '母校通知', 'description3': '交換校通知'}, {'title': '學生負責事項', 'description': '包含住宿、簽證、交通、保險、健康檢查，限制提領帳戶...等(依各交換校及所在國家規定)'}], 'button': {'text': '看詳細資訊', 'color': 'orange'}}"

@tool
def document_retrieval(string: str) -> str:
    """A Traditional Chinese, powerful tool designed to retrieve relevant information from the robust document database using natural language questions or key phrases.

    Args:
        string: a Traditional Chinese sentence or a Traditional Chinese key phrase to retrieve relevant information from the database
    """
    # query_text = "學生申請繳件有哪些方式？"

    result = documents.query(
        query_texts=[string],
        n_results=25
    )
    return str(result["documents"][0])

tools = [image_interpreting, document_retrieval]

### Check tools

In [205]:
print(f"Name:\n{image_interpreting.name}")
print("=====")
print(f"Description:\n{image_interpreting.description}")
print("=====")
print(f"Args:\n{image_interpreting.args}")

Name:
image_interpreting
=====
Description:
Interpret, explain or analyse the content and underlying logic of any given image and return in JSON format.

    Args:
        image_path: the image file path
=====
Args:
{'image_path': {'title': 'Image Path', 'type': 'string'}}


In [206]:
image_interpreting.invoke({"image_path": "../data/images/Step2校外徵選.jpg"})

"{'title': 'Step2. 校外徵選', 'sections': [{'title': 'Nomination', 'description': '校方提名', 'dates': ['第一學期：2月至3月', '第二學期：8月至9月']}, {'title': 'Application', 'description': '學生申請文件', 'description2': '申請文件及期程依各交換校規定辦理'}, {'title': 'Result', 'description': '結果通知', 'description2': '母校通知', 'description3': '交換校通知'}, {'title': '學生負責事項', 'description': '包含住宿、簽證、交通、保險、健康檢查，限制提領帳戶...等(依各交換校及所在國家規定)'}], 'button': {'text': '看詳細資訊', 'color': 'orange'}}"

### Agent

In [49]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=1,
    max_retries=2,
    api_key=GROQ_API_KEY
)

In [50]:
# Get the prompt to use - can be replaced with any prompt that includes variables "agent_scratchpad" and "input"!
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.pretty_print()




You are a helpful assistant


[33;1m[1;3m{chat_history}[0m


[33;1m[1;3m{input}[0m


[33;1m[1;3m{agent_scratchpad}[0m


In [51]:
# Construct the tool calling agent
agent = create_tool_calling_agent(llm, tools, prompt)

In [52]:
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, max_iterations=10)

In [53]:
# user_prompt = "'../data/images/Step2校外徵選.jpg' 請問在校方提名之後，學生要做什麼？"
# user_prompt = "'../data/images/Step2校外徵選.jpg' 請問校方提名的第一學期是幾月至幾月？"

# user_prompt = f"""
# '../data/images/Step2校外徵選.jpg'
# Recommend using tool "image_interpreting" to interpret, explain or analyse the content and underlying logic of any given image and return in JSON format.
# If information in the image isn't informative enough to answer the Human's message below, you could use the powerful tool "document_retrieval" to retrieve relevant information STRICTLY ONLY ONCE from the robust document database using 'Human's message' below.

# Human's message:
# "學生負責事項有哪些？"
# """

user_prompt = f"""
Use the powerful "document_retrieval" tool to search for information using the complete Human message STRICTLY ONLY TWICE, NO MORE, and then respond to the human's message in Traditional Chinese.

Human's message:
出國交換生一般需繳文件有哪些？
"""


agent_executor.invoke(
    {
        "input": user_prompt
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `document_retrieval` with `{'string': '出國交換生一般需繳文件有哪些？'}`


[0m[33;1m[1;3m['出，以準備提名作業。\n 注意  為了不影響原申請人權益，提出「轉換學校」或「補件\n申請」者需事先確認申請的學校尚有名額。\n學海惜珠額外文件\n獎學金詳細資訊請參考「相關補助」。\n1. 三個月內新式戶口名簿影本', '際處預約洽談後續規劃。\n錄取後出國前準備\n學生負責規劃\n簽證：收到入學許可正影本後請至「駐臺使館」或「駐臺外國\n機構」辦理學生簽證。\n其他：收到簽證後請主動規劃差旅、食宿、保險及個人體檢等\n事宜。\n⏩ 德國留學簽證、保險及限制提領帳戶：請點我', '後辦理 ) 至學務處辦理手續（分機 1214 ）。\n本申請單中「出國時間」的回程時間請大家留意！一定要在這個申\n請單上填報的回程時間前回來並抵達台灣，以免之後被限制出境。\n \n返國後\n一般生 ( 無獎學金的同學 ) 有獎學金的同學 ( 學海飛颺 )\n有獎學金的同學 ( 學海惜珠 / 臺奧 )\n \n請於返國後兩周內辦理以下文件繳交手續。', '先與國際處洽談，請勿未經討論直接與媒合學校窗口接洽。\n薦外申請流程\n1. 國際處提名：\n申請第一學期出發者： 2 月 -3 月\n申請第二學期出發者： 8 月 -9 月\n申請人請先行至學校列表確認申請學校的提名與申請截止日期\n( 如： fact sheet 、網頁等 ) 。少數學校時間結束較早，請第一\n時間聯絡國際處承辦人。\n2. 學生申請繳件：', '出國前\n一般需繳文件 ( 所有出國交換學生 )\n獲補助者額外需繳文件學務處兵役文件 ( 限男性 )\n \n請先閱讀旅外安全須知 (海外常見詐騙手法) ，並於出國前一個月辦\n理「一般須繳文件」、「獲補助者額外需繳交文件」等手續。\n旅外安全須知\n教育部來函，鑒於國際學術交流亦趨蓬勃，學生出國交流活動逐漸\n增加，請同學出國應妥善規劃，至外交部領事事務局網站', '(PDF) 並命名為「3_出國後研修資料_姓名」。\n2.符合中低收資格者連同第(5)項Excel檔⼀併繳

{'input': '\nUse the powerful "document_retrieval" tool to search for information using the complete Human message STRICTLY ONLY TWICE, NO MORE, and then respond to the human\'s message in Traditional Chinese.\n\nHuman\'s message:\n出國交換生一般需繳文件有哪些？\n',
 'output': '您要出國交換學生，一般需要繳交的文件包括：\n1. 獲准出國交換離校申請表\n2. 國外研修機構錄取信影本\n3. 連帶保證書\n4. 出國交換知情同意書\n5. 其他有利申請之文件，如國際化時數累計表等\n另外，若您獲得獎學金，則需要額外繳交的文件包括：\n1. 出差請示單\n2. 學生證 & 身分證正反面\n3. 帳戶\n4. 預借經費申請單\n5. 其他相關文件\n\n請注意，所需文件可能會因學校或獎學金的不同而有所變動，請務必與相關部門確認。'}

### Ignore

In [None]:
system_prompt = f"""
When you receive a tool call response, use the output to format an answer to the orginal user question.

You are a helpful assistant with tool calling capabilities.
"""

user_prompt = f"""
Here's a list of images that you can perform Optical Character Recognition (OCR) on to extract the texts in the image.
The images are provided with their file path:
1. "data/images/Step1校內徵選.jpg"

Step1校內徵選的流程是什麼？
"""

# I would like to know what texts are written on "Step1 校內徵選" image. Please help me out.

# I provided an image file with the file path: "data/images/Step1校內徵選.jpg".
# Please perform Optical Character Recognition (OCR) on the image and show me the result.

messages = [
    ("system", system_prompt),
    ("human", user_prompt),
]

In [None]:
# Invoke
response = llm_with_tools.invoke(messages)

for key, value in vars(response).items():
    print(f"{key}: {value}")

content: 
additional_kwargs: {'tool_calls': [{'id': 'call_fjk0', 'function': {'arguments': '{"image_file_path": "data/images/Step1校內徵選.jpg"}', 'name': 'image_ocr'}, 'type': 'function'}]}
response_metadata: {'token_usage': {'completion_tokens': 27, 'prompt_tokens': 356, 'total_tokens': 383, 'completion_time': 0.098181818, 'prompt_time': 0.042432677, 'queue_time': 0.019514951999999995, 'total_time': 0.140614495}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_fcc3b74982', 'finish_reason': 'tool_calls', 'logprobs': None}
type: ai
name: None
id: run-d10394e2-827a-477d-bd05-5b595067b156-0
example: False
tool_calls: [{'name': 'image_ocr', 'args': {'image_file_path': 'data/images/Step1校內徵選.jpg'}, 'id': 'call_fjk0', 'type': 'tool_call'}]
invalid_tool_calls: []
usage_metadata: {'input_tokens': 356, 'output_tokens': 27, 'total_tokens': 383}
