# Agent
https://python.langchain.com/v0.1/docs/use_cases/tool_use/quickstart/#chains

## Import

In [54]:
from dotenv import load_dotenv
import os
from pathlib import Path

from langchain_groq import ChatGroq
from langchain_core.tools import tool
from langchain import hub
from langchain.agents import AgentExecutor, create_tool_calling_agent

from groq import Groq
import base64
import json

import chromadb
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from uuid import uuid4

## Setup

In [64]:
# 刪除環境變量
if "GROQ_API_KEY" in os.environ:
    del os.environ["GROQ_API_KEY"]

In [65]:
# 找根目錄
def find_project_root(current_path, marker=".git"):
    current_path = Path(current_path).resolve()
    for parent in current_path.parents:
        if (parent / marker).exists():
            return parent
    return None

current_path = os.getcwd()
project_root = find_project_root(current_path, marker=".git")
print("Project root:", project_root)

# Load .env file
print(f"Successfully loaded env variables: {load_dotenv(project_root / ".env")}")

Project root: /Users/allen/Documents/code/Exchange_QA_Chatbot
Successfully loaded env variables: True


In [66]:
# Load env variables into python variables
print("Loaded env variables:")
print(f"GROQ_API_KEY = {os.getenv("GROQ_API_KEY")}")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

Loaded env variables:
GROQ_API_KEY = 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


## ChromaDB

In [101]:
chroma_client = chromadb.PersistentClient(path="vector_db_for_testing")

In [102]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
)

In [69]:
# Delete existing collections
current_collections = chroma_client.list_collections()
for collections in current_collections:
    chroma_client.delete_collection(name="documents")

# get_or_create_collection
documents = chroma_client.get_or_create_collection(
    name="documents", 
    metadata={"hnsw:space": "cosine", "hnsw:search_ef": 100}
)

def embed_pdf(pdf_file_path):
    loader = PyPDFLoader(pdf_file_path)
    pages = []
    pages = loader.load()

    for index in range(len(pages)):
        documents_list = text_splitter.split_text(pages[index].page_content)
        documents.add(
            documents=documents_list,
            ids=[f"id-{str(uuid4())}" for i in range(len(documents_list))],
            metadatas=[{"source": pages[index].metadata["source"], "page": index+1} for i in range(len(documents_list))]
        )

documents_folder = "../data/documents"
pdf_files = [f"{documents_folder}/{pdf_file_path}" for pdf_file_path in os.listdir(documents_folder)]

for pdf_file in pdf_files:
    embed_pdf(pdf_file)

### Query test

In [84]:
def print_query_results(query_results: dict) -> None:
    result_count = len(query_results["ids"][0])
    for i in range(result_count):
        id       = query_results["ids"][0][i]
        distance = query_results["distances"][0][i]
        document = query_results["documents"][0][i]
        metadata = query_results["metadatas"][0][i]
        print(f"id: {id}, distance: {distance}, metadata: {metadata}, document: {document}")

In [85]:
query_text = "學生申請繳件有哪些方式？"

print_query_results(
    documents.query(
        query_texts=[query_text],
        n_results=25
    )
)

id: id-c6a93277-5308-4eff-90b4-742d80278892, distance: 0.4068576693534851, metadata: {'page': 1, 'source': '../data/documents/交換學校列表.pdf'}, document: 臺北科大 網站地圖 行事曆 聯絡我們 EN 
學校列表 交換學生
學校列表
申請流程
相關補助
經驗分享
重要須知
關於我們 國際合作 境外學位生 交換學生 兩岸交流 重要須知 
實用資訊 相關辦法與要點 休退學注意事項
首頁 / 交換學生 / 學校列表
出國研修推薦學校名單
以下資訊供參考, 各校當期申請資格限制依各校官⽅網站公告
為準
id: id-6013f034-b772-42fa-9d2a-914b6a788e3f, distance: 0.40844344420832224, metadata: {'page': 3, 'source': '../data/documents/Step2 薦外申請.pdf'}, document: 校，以免造成不必要的困擾。
未收到交換學校入學許可前，請先完成下學期選課作業，勿辦
理任何出國相關手續，以免浪費金錢與時間；在收到入學許可
後，請記得退選 / 撤選下學期選課，以免有曠課紀錄。
補助計畫受獎生如最終未獲任何交換校錄取，其資格將自動取
消，不得要求予以保留。
出國研修生於研修學校選讀課程，不得要求獲取研修學校之學
位。
id: id-8bbf10ab-c7a0-4772-884f-a254a3dc4df2, distance: 0.4200864115613613, metadata: {'page': 1, 'source': '../data/documents/Step2 薦外申請.pdf'}, document: 先與國際處洽談，請勿未經討論直接與媒合學校窗口接洽。
薦外申請流程
1. 國際處提名：
申請第一學期出發者： 2 月 -3 月
申請第二學期出發者： 8 月 -9 月
申請人請先行至學校列表確認申請學校的提名與申請截止日期
( 如： fact sheet 、網頁等 ) 。少數學校時間結束較早，請第一
時間聯絡國際處承辦人。
2. 學生申請繳件：
id: id-53f284b0-234f-41a1-99

## LLM

### Tools

In [48]:
@tool
def image_interpreting(image_path: str) -> str:
    """Interpret, explain, analyse or describe the content and underlying logic of any given image and return in JSON format.

    Args:
        image_path: the image file path
    """
    # Function to encode the image
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    
    base64_image = encode_image(image_path)
    request_text = "You are an assistant skilled at interpreting the content and underlying logic in images. Texts in this image are in Traditional Chinese. List what you observe in this image in JSON format."
    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
    chat_completion = client.chat.completions.create(
        model="llama-3.2-90b-vision-preview",
        messages=[
            {
                "role": "user",
                "content": [
                    {   
                        "type": "text",
                        "text": request_text
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                        },
                    },
                ],
            }
        ],
        temperature=0.0,
        top_p=1,
        stream=False,
        response_format={"type": "json_object"},
        stop=None,
    )
    return str(json.loads(chat_completion.choices[0].message.content))
    # return "{'title': 'Step2. 校外徵選', 'sections': [{'title': 'Nomination', 'description': '校方提名', 'dates': ['第一學期：2月至3月', '第二學期：8月至9月']}, {'title': 'Application', 'description': '學生申請文件', 'description2': '申請文件及期程依各交換校規定辦理'}, {'title': 'Result', 'description': '結果通知', 'description2': '母校通知', 'description3': '交換校通知'}, {'title': '學生負責事項', 'description': '包含住宿、簽證、交通、保險、健康檢查，限制提領帳戶...等(依各交換校及所在國家規定)'}], 'button': {'text': '看詳細資訊', 'color': 'orange'}}"

@tool
def document_retrieval(string: str) -> str:
    """A Traditional Chinese, powerful tool designed to retrieve relevant information from the robust document database using natural language questions or key phrases.

    Args:
        string: a Traditional Chinese sentence or a Traditional Chinese key phrase to retrieve relevant information from the database
    """
    # query_text = "學生申請繳件有哪些方式？"

    result = documents.query(
        query_texts=[string],
        n_results=25
    )
    return str(result["documents"][0])

tools = [image_interpreting, document_retrieval]

### Check tools

In [205]:
print(f"Name:\n{image_interpreting.name}")
print("=====")
print(f"Description:\n{image_interpreting.description}")
print("=====")
print(f"Args:\n{image_interpreting.args}")

Name:
image_interpreting
=====
Description:
Interpret, explain or analyse the content and underlying logic of any given image and return in JSON format.

    Args:
        image_path: the image file path
=====
Args:
{'image_path': {'title': 'Image Path', 'type': 'string'}}


In [206]:
image_interpreting.invoke({"image_path": "../data/images/Step2校外徵選.jpg"})

"{'title': 'Step2. 校外徵選', 'sections': [{'title': 'Nomination', 'description': '校方提名', 'dates': ['第一學期：2月至3月', '第二學期：8月至9月']}, {'title': 'Application', 'description': '學生申請文件', 'description2': '申請文件及期程依各交換校規定辦理'}, {'title': 'Result', 'description': '結果通知', 'description2': '母校通知', 'description3': '交換校通知'}, {'title': '學生負責事項', 'description': '包含住宿、簽證、交通、保險、健康檢查，限制提領帳戶...等(依各交換校及所在國家規定)'}], 'button': {'text': '看詳細資訊', 'color': 'orange'}}"

### Agent

In [49]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=1,
    max_retries=2,
    api_key=GROQ_API_KEY
)

In [50]:
# Get the prompt to use - can be replaced with any prompt that includes variables "agent_scratchpad" and "input"!
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.pretty_print()




You are a helpful assistant


[33;1m[1;3m{chat_history}[0m


[33;1m[1;3m{input}[0m


[33;1m[1;3m{agent_scratchpad}[0m


In [92]:
# Construct the tool calling agent
agent = create_tool_calling_agent(llm, tools, prompt)

# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, max_iterations=10)

In [98]:
# user_prompt = "'../data/images/Step2校外徵選.jpg' 請問在校方提名之後，學生要做什麼？"
# user_prompt = "'../data/images/Step2校外徵選.jpg' 請問校方提名的第一學期是幾月至幾月？"

# user_prompt = f"""
# '../data/images/Step2校外徵選.jpg'
# Recommend using tool "image_interpreting" to interpret, explain or analyse the content and underlying logic of any given image and return in JSON format.
# If information in the image isn't informative enough to answer the Human's message below, you could use the powerful tool "document_retrieval" to retrieve relevant information STRICTLY ONLY ONCE from the robust document database using 'Human's message' below.

# Human's message:
# "學生負責事項有哪些？"
# """

user_prompt = f"""
IMPORTANT: Use the powerful "document_retrieval" tool to search for information using the complete Human message STRICTLY ONLY TWICE, NO MORE, and then respond to the human's message in Traditional Chinese.

Human's message:
出國交換生一般需繳文件有哪些？
"""

### Invoke

In [95]:
agent_result = agent_executor.invoke(
    {
        "input": user_prompt
    }
)

print(f"Input:\n{agent_result["input"]}\n\n=====\n\nOutput:\n{agent_result["output"]}")

Input:

Use the powerful "document_retrieval" tool to search for information using the complete Human message STRICTLY ONLY TWICE, NO MORE, and then respond to the human's message in Traditional Chinese.

Human's message:
出國交換生一般需繳文件有哪些？


=====

Output:
出國交換生一般需繳文件有：1. 獲准出國交換離校申請表；2. 國外研修機構錄取信影本；3. 連帶保證書；4. 出國交換知情同意書。


### Streaming

In [None]:
print(f"Streaming:\n=====\n")

async for event in agent_executor.astream_events(
    {"input": user_prompt}, version="v1"
):
    kind = event["event"]
    if kind == "on_chain_start":
        if (event["name"] == "Agent"):  # Was assigned when creating the agent with `.with_config({"run_name": "Agent"})`
            # print(f"Starting agent: {event['name']} with input: {event['data'].get('input')}")
            pass
    elif kind == "on_chain_end":
        if (event["name"] == "Agent"):  # Was assigned when creating the agent with `.with_config({"run_name": "Agent"})`
            # print()
            # print("--")
            # print(
            #     f"Done agent: {event['name']} with output: {event['data'].get('output')['output']}"
            # )
            pass
    if kind == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            # Empty content in the context of OpenAI means
            # that the model is asking for a tool to be invoked.
            # So we only print non-empty content
            print(content, end="")
    elif kind == "on_tool_start":
        # print("--")
        # print(
        #     f"Starting tool: {event['name']} with inputs: {event['data'].get('input')}"
        # )
        pass
    elif kind == "on_tool_end":
        # print(f"Done tool: {event['name']}")
        # print(f"Tool output was: {event['data'].get('output')}")
        # print("--")
        pass

print(f"\n=====\nStreaming ends.")

Streaming:
=====

出國交換生一般需繳納的文件包括：
1. 獲准出國交換離校申請表（範本）
2. 國外研修機構錄取信影本
3. 連帶保證書
4. 出國交換知情同意書（Step1 已繳交者無需提供）
5. 出差請示單（範本，紙本自行留存，需夾帶至返國後獲補助者額外需繳文件中）
6. 學生證 & 身分證正反面（格式請點我）
7. 帳戶（請檢附學校登記的帳戶存摺封面）（格式請點我）
8. 預借經費申請單（本單據用途為校內撥款，由國際處印製）
9. 其他有利申請之文件，如國際化時數累計表（可免附）
10. 中華民國身分證正反面影本（非中華民國國籍者，請提供在臺居留證正反面影本）
11. 特殊身份證明（非對象者可免附）
12. 出國交換知情同意書

這些文件可能需要根據學校的不同而有所變化，同時也有可能需要提供其他額外的文件。
=====
Streaming ends.

