# Playground: Design CoA Agent 
------------------

In [1]:
%load_ext autoreload
%autoreload 2

# Basic Setup

In [2]:
# import streamlit as st
# from streamlit_extras.add_vertical_space import add_vertical_space
# from utils import PERSONA, AVAILABLE_FUNCTIONS, FUNCTIONS_SPEC, Smart_Agent, add_to_cache
import sys
import time
import random
import os
from pathlib import Path  
import json
from IPython.display import Markdown, display

In [3]:
import openai
os.environ["OPENAI_API_KEY"] ="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"
openai.api_key  = os.getenv('OPENAI_API_KEY')


# use chatCompletion model:
def get_completion(prompt, model="gpt-4-1106-preview", system_message = "You are an helpful Accountant Analyst."): # Andrew mentioned that the prompt/ completion paradigm is preferable for this class

    system_msg = [{"role": "system", "content": system_message}]
    user_assistant_msgs  = [{"role": "assistant", "content": prompt[i]} if i % 2 else {"role": "user", "content": prompt[i]} for i in range(len(prompt))]
    messages = system_msg + user_assistant_msgs

    completion = openai.chat.completions.create(
        model=model,
        messages = messages,
        temperature=0.1,
        max_tokens=800,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
        )
    
    return completion.choices[0].message.content

# CoA Agent

* Account Store Index from deepLakeVectorStore
* Web Search with Google API 

In [4]:
from llama_index.llms import OpenAI

llm_gpt4 = OpenAI(temperature=0.1, model="gpt-4")
llm_gpt4_0613 = OpenAI(model_name="gpt-4-0613")
llm_gpt3_turbo_0125 = OpenAI(model_name="gpt-3.5-turbo-0125")



## Account Store Query Engine

Create an agent that calls account informaion from the account store. The challange will be to pull the needed information from the store.

In [5]:
import llama_index
from llama_index.tools import QueryEngineTool, ToolMetadata

from llama_index import (
    VectorStoreIndex,
    ServiceContext
)


# Deep Lake Vector Store
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.storage.storage_context import StorageContext

import os
os.environ['ACTIVELOOP_TOKEN'] = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6ImRlbm5pc3RyaWVwa2UiLCJhcGlfa2V5IjoiRmZfRlMwZGh4S0dNcERSb0xueDJ0X25kcEtoRXM1MDhtcHdWRlFBdFZCc1hfIn0.'


service_context = ServiceContext.from_defaults(llm=llm_gpt4)

def create_index_from_db_with_DeepLakeVectorStore(org_id, dataset_name, overwrite=False):
    """
    Create an index from database.

    Parameters:
    org_id (str): Organization ID for Activeloop.
    dataset_name (str): Name of the dataset for Activeloop.

    Returns:
    VectorStoreIndex: The created index.
    """

    # Set up the dataset path
    dataset_path = f"hub://{org_id}/{dataset_name}"

    # Create an index over the documents
    vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=overwrite)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    document_index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)

    return document_index

def create_index_from_file_with_DeepLakeVectorStore(documents, org_id, dataset_name, overwrite=True):
    """
    Create an index from a given file.

    Parameters:
    document (list): llamaindex Documents object.
    org_id (str): Organization ID for Activeloop.
    dataset_name (str): Name of the dataset for Activeloop.

    Returns:
    VectorStoreIndex: The created index.
    """

    # Set up the dataset path
    dataset_path = f"hub://{org_id}/{dataset_name}"

    # Create an index over the documents
    vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=overwrite)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    document_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

    return document_index

**LlamaIndex Agent for dynamic account recommendation**

Try out low-level API for Customizing the stages of querying: https://docs.llamaindex.ai/en/stable/understanding/querying/querying.html#customizing-the-stages-of-querying

In [6]:
# create index fom vectore store
index = create_index_from_db_with_DeepLakeVectorStore("dennistriepke", "coa_account_store", overwrite = False)

Deep Lake Dataset in hub://dennistriepke/coa_account_store already exists, loading from the storage


In [7]:
from llama_index import get_response_synthesizer
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.postprocessor import SimilarityPostprocessor
# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer(response_mode="tree_summarize", structured_answer_filtering=True)

# assemble query engine
account_store_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)]
)

# Define the Query engine tool for each document
query_tool_account_store = QueryEngineTool.from_defaults(
    query_engine=account_store_engine,
    name="account_store",
    description=("""
                 Provides information about a single accounts used in a chart of account.

                 Use this tool to get following info for a single account:
                 - Account Name
                 - Balance: (Specify Debit or Credit Account)
                 - Account Type: 
                 - Financial Statement Type
                 - Account Description (what it used for and IFRS and GAAP information)
                 
                 Use a detailed plain text question as input to the tool.  
                 Make sure you response is designed in length and format for an LLM Agent (not human). The agent will recive your result and syntehize the final answer.  
                 
                """
    ),
)

r = account_store_engine.query("Show me all accounts and its meta data I can use to record R&D. For instance: `{ 'account_name': 'Inventory of aggregates', 'balance': 'Dr', 'level': 4, 'account_type': 'Assets', 'financial_statement_type': 'Balance Sheet'}`")

Markdown(r.response)

{
    'account_name': 'Inventory of aggregates',
    'balance': 'Dr',
    'level': 4,
    'account_type': 'Assets',
    'financial_statement_type': 'Balance Sheet'
}

## Create a web search tool

In [21]:
import urllib.parse
from typing import Optional

import requests
from llama_index.readers.schema.base import Document
from llama_index.tools.tool_spec.base import BaseToolSpec

QUERY_URL_TMPL = (
    "https://www.googleapis.com/customsearch/v1?key={key}&cx={engine}&q={query}"
)


class GoogleSearchToolSpec(BaseToolSpec):
    """Google Search tool spec."""

    spec_functions = ["google_search"]

    def __init__(self, key: str, engine: str, num: Optional[int] = None) -> None:
        """Initialize with parameters."""
        self.key = key
        self.engine = engine
        self.num = num

    def google_search(self, query: str):
        """
        Make a query to the Google search engine to receive a list of results.

        Args:
            query (str): The query to be passed to Google search.
            num (int, optional): The number of search results to return. Defaults to None.

        Raises:
            ValueError: If the 'num' is not an integer between 1 and 10.
        """
        url = QUERY_URL_TMPL.format(
            key=self.key, engine=self.engine, query=urllib.parse.quote_plus(query)
        )

        if self.num is not None:
            if not 1 <= self.num <= 10:
                raise ValueError("num should be an integer between 1 and 10, inclusive")
            url += f"&num={self.num}"

        response = requests.get(url)
        return [Document(text=response.text)]
    

gsearch_tools  = GoogleSearchToolSpec(key='AIzaSyCQFnxl22R0aFF-dEh4vHH7o0IEQ3gY1Y8', engine= '431e5ac5e35c049c3', num=10).to_tool_list()

# for tool in [*gsearch_tools]:
#     print(tool.metadata.name)
#     print(tool.metadata.description)

from llama_index.tools.tool_spec.load_and_search.base import LoadAndSearchToolSpec
print("Wrapping " + gsearch_tools[0].metadata.name)
gsearch_load_and_search_tools = LoadAndSearchToolSpec.from_defaults(
    gsearch_tools[0],
    name="google_search_tool", 
    description=f"""\
Useful for any queries that requires web search. 
"""
).to_tool_list()

Wrapping google_search


In [23]:
# Create the Agent with load/search tools
from llama_index.agent import ContextRetrieverOpenAIAgent, OpenAIAgent, ReActAgent, FnRetrieverOpenAIAgent

web_search_agent = OpenAIAgent.from_tools(
    gsearch_load_and_search_tools, 
    llm=llm_gpt4_0613,
    system_prompt="""
    You are a specialized web search agent designed to answer queries with  a web search request.  
    You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge. 
    Make sure you response is designed in length and format for an LLM Agent (not human). The agent will recive your result and syntehize the final answer. 
    """,
    verbose=True
)

# r = web_search_agent.query(""" 
# How to perform a journal entries compliance test with IFRS standrds?
# Create a stepwise report with headlines, how a LLM Agent with access to IFRS documentation can automate the process. 

# Make sure you response is designed in length and format for an LLM Agent (not human). The agent will recive your result and syntehize the final answer.                    
# """)

# Markdown(r.response)


## CoA Agent

I build a top-level agent that can orchestrate across the different document agents and tools to answer any user query related to Accounts and faithfullness of booking.  

This agent takes in all document agents as tools. This specific agent RetrieverOpenAIAgent performs tool retrieval before tool use (unlike a default agent that tries to put all tools in the prompt 


https://docs.llamaindex.ai/en/stable/examples/agent/multi_document_agents.html

In [25]:
all_tools = []


# Account Store
all_tools.append(query_tool_account_store)


# Web search Agent
web_search_summary = ("""
    This tool contains an agent that has access internet.
    Use this tool if you want to research knowledge.

"""
)
web_search_tool = QueryEngineTool(
    query_engine=web_search_agent,
    metadata=ToolMetadata(
        name=f"tool_web_search",
        description=web_search_summary,
    ),
)

all_tools.append(web_search_tool)



# # IFRS Agent
# ifrs_summary = ("""
#     This tool contains an agent that has access to International Financial Reporting Standards (IFRS) articles. 
#     Use this tool if you want to answer any questions about accounting and reporting standrds.

# """
# )
# ifrs_tool = QueryEngineTool(
#     query_engine=ifrs_agent,
#     metadata=ToolMetadata(
#         name=f"tool_ifrs",
#         description=ifrs_summary,
#     ),
# )

# all_tools.append(ifrs_tool)


ReActAgent

In [26]:
system_message = """

    # Role Description:

    You are a specialized accountant focusing on chart of accounts.

    # Task Overview:

    For every query you receive, follow these steps:

    1.  Initial Query Planning and Execution:  
        -   First, decide which tools are necessary based on the query. Develop plain text questions tailored to each tool. Ensure your questions are comprehensive so each tool can process long inputs effectively. Do not overly simplify the input.
        -    Whenever you have the thought: "I can answer without using any more tools.", always use the `tool_web_search` to enrich your answer. Compose a plain text question and memorize the response.

    2.  Response Synthesis:  
        -   Combine the answers from all tools and synthesize a coherent response that addresses the initial user query.

    # Detailed Guidance for Tool Interaction:

    -   Example Process:
        -   *Thought*: Determine the necessity of a tool to address a query.
        -   *Action*: Choose `tool_web_search`.
        -   *Action Input*: `{ 'input': 'Explain IFRS using the web search tool. Respond to me with a structured report.'}`

    # Usage Guidelines:

    1.  Account Information Queries:

        -   Always begin with the `account_store` tool when a user asks about an account or verification is needed.
        -   If no relevant account is found, respond with "I do not have a matching account" and use the `gsearch_load_and_search_tools` for suggesting an account.
    2.  Transaction Recording Queries:

        -   Search for all relevant accounts associated with the transaction using the `account_store` tool.
        -   Analyze the descriptions of how to use these accounts provided by the account store.
        -   Create a booking record based on the transaction description from the user, detailing account name, debit, and credit in a table format.
    3.  Handling Non-responses from Tools:

        -   If no tool provides a satisfactory answer, use the Google search tool to suggest potential answers.

    # Documentation and References:

    -   Always include references for all documents used with the metadata from the function "account_store". For example:

        -   *Inventory of aggregates is the best account* [1]

        -   [1]: `{ 'account_name': 'Inventory of aggregates', 'balance': 'Dr', 'level': 4, 'account_type': 'Assets', 'financial_statement_type': 'Balance Sheet'}`

    # Automated Response:

    -   Do not wait for any human input to respond.
"""
# Markdown(system_message)

In [27]:

from llama_index.agent import ContextRetrieverOpenAIAgent, OpenAIAgent, ReActAgent, FnRetrieverOpenAIAgent
coa_agent_react = ReActAgent.from_tools(
    tools = all_tools ,
    max_function_calls=10,
    llm= llm_gpt4_0613, #OpenAI(temperature=0.1, model="gpt-4-0613"),
    verbose=True,
    system_prompt= system_message
)

# coa_agent_react.chat_history.clear()

# r = coa_agent_react.chat("Show me all accounts and its meta data I can use to record the value of properties? For instance: `{ 'account_name': 'Inventory of aggregates', 'balance': 'Dr', 'level': 4, 'account_type': 'Assets', 'financial_statement_type': 'Balance Sheet'}`")
# Markdown(r.response)

## Examples

#### **Example: Goolge Search**

In [None]:
# prompt = """
# Use the google search tool for explain IFRS to me?   
# """

# r = coa_agent_react.query(prompt)
# Markdown(r.response)


#### **Example: Invoice booking**

In [None]:
prompt = """
    Little Electrodes, Inc. is a retailer that sells electronics and computer parts. On January 1, Little Electrode, Inc. sells a computer monitor to a customer.
    How to record a journal entry for the sales invoice below to a customer "called  "Software Client"? 

    INVOICE:
    '''
        Total: 381.21€
        VAT 19%: 72.41€
        Gross Amount incl VAT:453.53€  
    '''
    
    Provide the full journal entry in a table with the columns account_name, debit, credit.
    Reference the account description for the used accounts from the account store.
    
    Additionally, explain how to book a the bank statement transaction of the invoice based on web search result. 
    
    Finally, use the ifrs tool to reference the journal entries rules.

    Always structue your response with headlines.



"""

# coa_agent_react.chat_history.clear()
# r = coa_agent_react.query(prompt)
# Markdown(r.response)

# IFRS Agent  
* IFRS Document Agent 

## Create IFRS document tool

This tool is an RAG agent where each docuemnt is its own agent.  

https://blog.llamaindex.ai/agentic-rag-with-llamaindex-2721b8a49ff6

In [None]:
from llama_index.llms import OpenAI
from llama_index import ServiceContext

from llama_index import VectorStoreIndex, SummaryIndex

# Build document agents
from llama_index.agent import OpenAIAgent
from llama_index import load_index_from_storage, StorageContext, SimpleDirectoryReader
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.node_parser import SentenceSplitter
import os
from tqdm.notebook import tqdm
import pickle

llm_gpt4 = OpenAI(temperature=0.0, model="gpt-4")
llm_gpt4_0613 = OpenAI(model_name="gpt-4-0613")
llm_gpt3_turbo_0125 = OpenAI(model_name="gpt-3.5-turbo-0125")

# Deep Lake Vector Store
from llama_index.vector_stores import DeepLakeVectorStore
from llama_index.storage.storage_context import StorageContext

import nest_asyncio
nest_asyncio.apply()


import os
os.environ['ACTIVELOOP_TOKEN'] = 'eyJhbGciOiJub25lIiwidHlwIjoiSldUIn0.eyJpZCI6ImRlbm5pc3RyaWVwa2UiLCJhcGlfa2V5IjoiRmZfRlMwZGh4S0dNcERSb0xueDJ0X25kcEtoRXM1MDhtcHdWRlFBdFZCc1hfIn0.'



# Create the vectore store index from deep lake
def create_index_from_file_with_DeepLakeVectorStore(documents, org_id, dataset_name, overwrite=True):
    """
    Create an index from a given file.

    Parameters:
    document (list): llamaindex Documents object.
    org_id (str): Organization ID for Activeloop.
    dataset_name (str): Name of the dataset for Activeloop.

    Returns:
    VectorStoreIndex: The created index.
    """

    # Set up the dataset path
    dataset_path = f"hub://{org_id}/{dataset_name}"

    # Create an index over the documents
    vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=overwrite)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    document_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

    return document_index

# for load the index
def create_index_from_db_with_DeepLakeVectorStore(org_id, dataset_name, overwrite=False):
    """
    Create an index from database.

    Parameters:
    org_id (str): Organization ID for Activeloop.
    dataset_name (str): Name of the dataset for Activeloop.

    Returns:
    VectorStoreIndex: The created index.
    """

    # Set up the dataset path
    dataset_path = f"hub://{org_id}/{dataset_name}"

    # Create an index over the documents
    vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=overwrite)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    document_index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context)

    return document_index

# For cut the name for llamaINdex name restriction 
def query_engine_name_checker(name):
    if len(name) >= 63:
        name = name[:62]
    return name

                          

**Vectore Embedding of the IFRS Files**  

Read PDF files as simple text. This method requires that there are no complex tables in the files.

In [None]:
import os

# Define the directory path
directory_path = ".\\data\\ifrs\\"

# List to hold all found PDF files
pdf_files = []

# Build agents dictionary
agents_dict = {}
extra_info_dict = {}

service_context = ServiceContext.from_defaults(llm=llm_gpt3_turbo_0125)

def build_agent_per_document(file_base, file_path, load_index_from_deeplake = True):

    # Read pdf file and transform into text documents per page
    print(file_path)
    reader = SimpleDirectoryReader(input_files=[file_path])
    documents = reader.load_data()

    # INDEX:
    print ("Get Index for", file_base)
    if load_index_from_deeplake:
        vector_index = create_index_from_db_with_DeepLakeVectorStore("dennistriepke", file_base)
        summary_index = SummaryIndex(documents, service_context=service_context)
    else: 
        vector_index = create_index_from_file_with_DeepLakeVectorStore(documents, "dennistriepke", file_base, overwrite = True) # Just for the first time from documents
        summary_index = SummaryIndex(documents, service_context=service_context)
    
    # define query engines
    vector_query_engine = vector_index.as_query_engine()
    summary_query_engine = summary_index.as_query_engine(response_mode="tree_summarize")

    # Extract a summary from all documents for describing the summary query engine
    summary_out_path = f"./data/ifrs/llamaindex_docs/{file_base}_summary.pkl"
    if not os.path.exists(summary_out_path):
        Path(summary_out_path).parent.mkdir(parents=True, exist_ok=True)
        summary = str(summary_query_engine.query("Extract a concise 1-2 line summary of this document").response)
        pickle.dump(summary, open(summary_out_path, "wb"))
    else:
        summary = pickle.load(open(summary_out_path, "rb"))


    # define tools
    query_engine_tools = [
        QueryEngineTool(
            query_engine=vector_query_engine,
            metadata=ToolMetadata(
                name= query_engine_name_checker(f"vector_tool_{file_base}"),
                description=f"Useful for questions related to specific facts",
            ),
        ),
        QueryEngineTool(
            query_engine=summary_query_engine,
            metadata=ToolMetadata(
                name= query_engine_name_checker(f"summary_tool_{file_base}"),
                description=f"Useful for summarization questions",
            ),
        ),
    ]

    # build agent
    # function_llm = OpenAI(model="gpt-4")
    function_llm = OpenAI(model_name="gpt-3.5-turbo-0125")
    agent = OpenAIAgent.from_tools(
        query_engine_tools,
        llm=function_llm,
        verbose=True,
        system_prompt=f"""\
    You are a specialized agent designed to answer queries about the `{file_base}` part of the IFRS docs.
    You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.

    **Ruls for the tool selection**
    - summary_tool_{file_base}: use this tool first to plan your task and summarizaiton issues 
    - vector_tool_{file_base}: use for task solving and detailed information extraxtion
    """,
    )
    return agent, summary

# Walk through the directory
for root, dirs, files in os.walk(directory_path):
    for file in files:
        # Check if the file is a PDF by its extension
        if file.lower().endswith(".pdf"):

            # Construct full file path and add it to the list
            file_path = os.path.join(root, file)
            pdf_files.append(file_path)

            # Get filename 
            file_base = file.replace("r'^[a-zA-Z0-9\-_]+$'","_").replace(".", "_")
            print("\n\n", file_base)

            agent, summary = build_agent_per_document(file_base, file_path,  load_index_from_deeplake = True)

            print("Add agent to dict:", file_path)
            agents_dict[file_base] = agent
            extra_info_dict[file_base] = {"summary": summary}
            



 ias-1-presentation-of-financial-statements_pdf
.\data\ifrs\ias-1-presentation-of-financial-statements.pdf
Get Index for ias-1-presentation-of-financial-statements_pdf
Deep Lake Dataset in hub://dennistriepke/ias-1-presentation-of-financial-statements_pdf already exists, loading from the storage
Add agent to dict: .\data\ifrs\ias-1-presentation-of-financial-statements.pdf


 ias-12-income-taxes_pdf
.\data\ifrs\ias-12-income-taxes.pdf
Get Index for ias-12-income-taxes_pdf
Deep Lake Dataset in hub://dennistriepke/ias-12-income-taxes_pdf already exists, loading from the storage
Add agent to dict: .\data\ifrs\ias-12-income-taxes.pdf


 ias-16-property-plant-and-equipment_pdf
.\data\ifrs\ias-16-property-plant-and-equipment.pdf
Get Index for ias-16-property-plant-and-equipment_pdf
Deep Lake Dataset in hub://dennistriepke/ias-16-property-plant-and-equipment_pdf already exists, loading from the storage
Add agent to dict: .\data\ifrs\ias-16-property-plant-and-equipment.pdf


 ias-19-employee-

**Build Retriever-Enabled OpenAI Agent**  
We build a top-level agent that can orchestrate across the different document agents to answer any user query.

This RetrieverOpenAIAgent performs tool retrieval before tool use (unlike a default agent that tries to put all tools in the prompt).

Improvements from V0: We make the following improvements compared to the "base" version in V0.

Adding in reranking: we use Cohere reranker to better filter the candidate set of documents.
Adding in a query planning tool: we add an explicit query planning tool that's dynamically created based on the set of retrieved tools.

In [None]:
# define tool for each document agent
all_tools = []
for file_base, agent in agents_dict.items():
    print("Define Agent Tool:", file_base)
    summary = extra_info_dict[file_base]["summary"]
    
    doc_tool = QueryEngineTool(
        query_engine=agent,
        metadata=ToolMetadata(
            name= query_engine_name_checker(f"tool_{file_base}").replace("-", "_"),
            description=summary,
        ),
    )

    all_tools.append(doc_tool)


for t in all_tools:
    print(t.metadata.name)
# print(all_tools[0].metadata)


Define Agent Tool: ias-1-presentation-of-financial-statements_pdf
Define Agent Tool: ias-12-income-taxes_pdf
Define Agent Tool: ias-16-property-plant-and-equipment_pdf
Define Agent Tool: ias-19-employee-benefits_pdf
Define Agent Tool: ias-2-inventories_pdf
Define Agent Tool: ias-21-the-effects-of-changes-in-foreign-exchange-rates_pdf
Define Agent Tool: ias-26-accounting-and-reporting-by-retirement-benefit-plans_pdf
Define Agent Tool: ias-32-financial-instruments-presentation_pdf
Define Agent Tool: ias-33-earnings-per-share_pdf
Define Agent Tool: ias-37-provisions-contingent-liabilities-and-contingent-assets_pdf
Define Agent Tool: ias-38-intangible-assets_pdf
Define Agent Tool: ias-39-financial-instruments-recognition-and-measurement_pdf
Define Agent Tool: ias-40-investment-property_pdf
Define Agent Tool: ias-41-agriculture_pdf
Define Agent Tool: ias-7-statement-of-cash-flows_pdf
Define Agent Tool: ifrs-12-disclosure-of-interests-in-other-entities_pdf
Define Agent Tool: ifrs-13-fair-val

**Next part is for create the tool (aka agent) retreiver.**  

`CustomRetriever`   
- returns a list nodes (= tools) based on query 
- _vector_retriever:: ObjectIndex from tool mapping as node retriever  
- Preprocessing the retreived tools with cohere rerank 
![image.png](attachment:image.png)


`CustomObjectRetriever`  
- returns a list of tools based on query + 1 subquestion tool with all the retrieved tools  
- calls CustomRetriever and gets the nodes 
- using tool_mapping to get query engine tools  
![image-2.png](attachment:image-2.png)  
- Buld sub question tool basen on all the retrieved tools  
![image-3.png](attachment:image-3.png)

In [None]:
# define an "object" index and retriever over these tools
from llama_index import VectorStoreIndex
from llama_index.objects import (
    ObjectIndex,
    SimpleToolNodeMapping,
    ObjectRetriever,
)
from llama_index.retrievers import BaseRetriever
from llama_index.postprocessor import CohereRerank
from llama_index.tools import QueryPlanTool
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.llms import OpenAI

# define a custom retriever with reranking
class CustomRetriever(BaseRetriever):
    """
    A custom retriever class that extends the BaseRetriever for retrieving nodes based on vector similarity.
    It incorporates a postprocessing step to rerank the retrieved nodes.
    
    Attributes:
        _vector_retriever: The vector-based node retriever.
        _postprocessor: The postprocessing component for reranking the nodes.
    """

    def __init__(self, vector_retriever, postprocessor=None):
        """
        Initializes the CustomRetriever with a vector retriever and an optional postprocessor.
        
        Parameters:
            vector_retriever: The vector-based node retriever.
            postprocessor: An optional postprocessor for reranking. Defaults to CohereRerank if not provided.
        """

        self._vector_retriever = vector_retriever
        self._postprocessor = postprocessor or CohereRerank(top_n=5, api_key="rFKqnpNZZNBp9D8LaCXmvUG2z0LXM9bV28qjtSPz")
        super().__init__()

    def _retrieve(self, query_bundle):
        """
        Retrieves and postprocesses nodes based on the provided query bundle.
        
        Parameters:
            query_bundle: The query information package used for retrieval.
        
        Returns:
            A list of filtered and reranked nodes.
        """
        retrieved_nodes = self._vector_retriever.retrieve(query_bundle)
        filtered_nodes = self._postprocessor.postprocess_nodes(retrieved_nodes, query_bundle=query_bundle)

        return filtered_nodes

# This class incorporates additional logic for query planning using retrieved tools.
class CustomObjectRetriever(ObjectRetriever):
    """
    A custom object retriever that utilizes a custom retrieval logic along with query planning.

    Attributes:
    retriever (CustomRetriever): The custom retriever for initial retrieval.
    object_node_mapping (SimpleToolNodeMapping): Mapping between objects and nodes.
    llm (OpenAI): An instance of the OpenAI model for processing.
    """

    def __init__(self, retriever, object_node_mapping, all_tools, llm=None):
        """
        Initializes the CustomObjectRetriever with a retriever, mapping, and optionally an LLM.

        Parameters:
        retriever (CustomRetriever): The custom retriever used for initial retrieval.
        object_node_mapping (SimpleToolNodeMapping): The mapping between objects and their corresponding nodes.
        all_tools: A collection of all tools available for retrieval.
        llm (OpenAI, optional): The Large Language Model instance. Defaults to a new OpenAI instance.
        """
        self._retriever = retriever
        self._object_node_mapping = object_node_mapping
        self._llm = llm or OpenAI("gpt-4-0613")

    def retrieve(self, query_bundle):
        """
        Retrieves relevant tools based on the provided query bundle.

        Parameters:
        query_bundle: The query data used for tool retrieval.

        Returns:
        A list of tools relevant to the query, enhanced with a query planning tool.
        """
        # Retrieve the tool nodes from query
        nodes = self._retriever.retrieve(query_bundle)
        
        # Get the query engine tools object from retrieved node
        tools = [self._object_node_mapping.from_node(n.node) for n in nodes]

        return tools
#         # Initialize a query engine for sub-questions with the retrieved tools.
#         sub_question_sc = ServiceContext.from_defaults(llm=self._llm)
#         sub_question_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=tools, service_context=sub_question_sc, verbose=True)
        
#         # Create a QueryEngineTool specifically designed for comparison queries.
#         sub_question_tool = QueryEngineTool(
#             query_engine=sub_question_engine,
#             metadata=ToolMetadata(
#                 name="compare_tool", 
#                 description=f"""\
# Useful for any queries that involve comparing multiple documents. ALWAYS use this tool for comparison queries - make sure to call this \
# tool with the original query. Do NOT use the other tools for any queries involving multiple documents.
# """
#             ),
#         )

#         return tools + [sub_question_tool]
    
# CUSTOM NODE RETRIEVER
# Create a tool-node mapping from a list of tools. This mapping aids in managing the association between tools and their corresponding nodes.
tool_mapping = SimpleToolNodeMapping.from_objects(all_tools)

# Instantiate an ObjectIndex for indexing and retrieving tool objects, leveraging a VectorStoreIndex for vector-based operations.
obj_index = ObjectIndex.from_objects(all_tools, tool_mapping, VectorStoreIndex)

# Establish a node retriever based on vector similarity, specifying the number of top similar nodes to be retrieved.
vector_node_retriever = obj_index.as_node_retriever(similarity_top_k=10)

# Custom Retriever
custom_node_retriever = CustomRetriever(vector_node_retriever)

#CUSTOM OBJECT RETREIVER FROM NODE
# Wrap the custom object retriever to handle query engine tools retrieval, effectively integrating query planning.
custom_obj_retriever = CustomObjectRetriever(custom_node_retriever, tool_mapping, all_tools, llm=llm_gpt3_turbo_0125)


## Bulild ReAct Agent with the custom_obj_retriever 

In [17]:
from llama_index.agent import ReActAgent, OpenAIAgent
ifrs_agent = ReActAgent.from_tools(
     tool_retriever=custom_obj_retriever,
     system_prompt=""" \
You are an agent designed to answer queries about the IFRS accounting standards documentation.
Plan which tools to use in order to retrieve accounting standard information needed based on the user query. 
You can choose if you require a tool for your answer. 

#  Steps for Transaction Input Analysis
1. **Capture Transaction Details**: Collects detailed information about each transaction, including the date, amounts, accounts involved, and a description of the transaction.  
2. **Contextual Information**: Next determine context-specific information that might affect IFRS compliance, such as the nature of the transaction (e.g., lease, revenue recognition, financial instrument), the involved parties, and any contractual terms.  
3. **IFRS Rule Mapping**: Map the captured transaction details to relevant IFRS rules. This involves recognizing the transaction type and determining the applicable standards (e.g., IFRS 15 for revenue from contracts with customers, IFRS 16 for leases).  
4. **Compliance Analyis**: Check if the  transaction complies with the identified IFRS standards. This includes verifying recognition, measurement, presentation, and disclosure requirements. flags transactions that deviate from expected patterns or fail to meet specific IFRS criteria, indicating potential compliance issues.  
5. **Correct Suggestions**: Correction Suggestions: For transactions flagged as non-compliant, the system provides detailed explanations of the compliance issues and suggests corrective actions, such as adjusting the transaction amounts, changing the accounts involved, or adding necessary disclosures.  

 """,
     llm=llm_gpt4_0613,
     verbose=True,
     max_function_calls=5,
 )
ifrs_agent.chat_history.clear()

In [None]:
"""
#  Transaction Input Analysis
1. **Capture Transaction Details**: Collects detailed information about each transaction, including the date, amounts, accounts involved, and a description of the transaction.  
2. **Contextual Information**: Next determine context-specific information that might affect IFRS compliance, such as the nature of the transaction (e.g., lease, revenue recognition, financial instrument), the involved parties, and any contractual terms.  
3. **IFRS Rule Mapping**: Map the captured transaction details to relevant IFRS rules. This involves recognizing the transaction type and determining the applicable standards (e.g., IFRS 15 for revenue from contracts with customers, IFRS 16 for leases).  
4. **Compliance Analyis**: Check if the  transaction complies with the identified IFRS standards. This includes verifying recognition, measurement, presentation, and disclosure requirements. flags transactions that deviate from expected patterns or fail to meet specific IFRS criteria, indicating potential compliance issues.  
5. **Correct Suggestions**: Correction Suggestions: For transactions flagged as non-compliant, the system provides detailed explanations of the compliance issues and suggests corrective actions, such as adjusting the transaction amounts, changing the accounts involved, or adding necessary disclosures.  

General ledger document
Accounting document
Foreign currency valuation
Document with partner company
Asset posting
Depreciation postings
General ledger posting
Electronic banking
Cash receipt
Customer payment
Customer credit note
Vendor credit note
Vendor document
Vendor invoice
Payment posting
Account maintenance
Goods issue
Goods issue/delivery
Goods receipt
Invoice gross
Travel expense report
General reversal
CO document (Controlling document)

"""

'\n#  Transaction Input Analysis\n1. **Capture Transaction Details**: Collects detailed information about each transaction, including the date, amounts, accounts involved, and a description of the transaction.  \n2. **Contextual Information**: Next determine context-specific information that might affect IFRS compliance, such as the nature of the transaction (e.g., lease, revenue recognition, financial instrument), the involved parties, and any contractual terms.  \n3. **IFRS Rule Mapping**: Map the captured transaction details to relevant IFRS rules. This involves recognizing the transaction type and determining the applicable standards (e.g., IFRS 15 for revenue from contracts with customers, IFRS 16 for leases).  \n4. **Compliance Analyis**: Check if the  transaction complies with the identified IFRS standards. This includes verifying recognition, measurement, presentation, and disclosure requirements. flags transactions that deviate from expected patterns or fail to meet specific I

In [19]:
# r = ifrs_agent.query("""

# Compare the ifrs operating segments with isa employee benefits. 
# Provide your result in a structured fromat with headlines.  

# Always reference the tools your used.

# """)
# Markdown(r.response)

In [None]:
# r = ifrs_agent.query("""
# **Capture Transaction Details**: 
# Collects detailed information about the journal entry, including the date, amounts, accounts involved, and a description of the transaction.  
                                 
# ---------------------------------------                 
# **Journal Entry**: 

# | account_name | debit | credit |  
# | --- | --- | --- |  
# | RETAIL COOP |  381.21€  | - |   
# | Taxes receivable (VAT) | 72.41€ | - |  
# | Trade accounts receivable (A/R) | - | 453.62€ |  

# ---------------------------------------------------

# """)
# Markdown(r.response)

In [None]:
# r = ifrs_agent.chat("""
# Please perform the second step of the Journal Entry Input Analysis. 
                    
# 2. **Contextual Information**: Next determine context-specific information that might affect IFRS compliance, such as the nature of the transaction (e.g., lease, revenue recognition, financial instrument), the involved parties, and any contractual terms.  

                    
# """)
# Markdown(r.response)

# 2. AutoGen


## Init config files

In [28]:
import autogen
import os
import openai

os.environ["OPENAI_API_KEY"] ="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"
openai.api_key  = os.getenv('OPENAI_API_KEY')

config_list = [
    {
        "model": "gpt-4-1106-preview",
        "api_key": os.getenv('OPENAI_API_KEY'),
    }
]

llm_config = {
    "seed": 42,  # change the seed for different trials
    "temperature": 0,
    "config_list": config_list,
    "timeout": 600
}



{'temperature': 0,
 'functions': [{'name': 'chart_of_account_tool',
   'description': 'Use this tool when you need account information and account meta data.',
   'parameters': {'type': 'object',
    'properties': {'message': {'type': 'string',
      'description': 'The question to ask in relation to general ledger accounts and web search queries.'}},
    'required': ['message']}}],
 'config_list': [{'model': 'gpt-4-1106-preview',
   'api_key': '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'}],
 'timeout': 120}

## Register tools

In [30]:

from llama_index.tools import BaseTool 
from pydantic import BaseModel, Field
from typing import Optional, Type, Annotated


class CoaStoreTool(BaseTool):
    name = "chart_of_account_tool"
    description = "Use this tool when you need account information and account meta data." 
    args = {
            "message": {
                "type": "string",
                "description": "The question to ask in relation to general ledger accounts and web search queries.",
            }
        }
    required = "message"

    def _run(self, message: str):
        r = coa_agent_react.chat(message)
        return r.response

class IfrsTool(BaseTool):
    name = "ifrs_tool"
    description = "Use this tool when you need accounting standard information from IFRS standards." 
    args = {
            "message": {
                "type": "string",
                "description": "The question to ask in relation IFRS documentation.",
            }
        }
    required = "message"

    def _run(self, message: str):
        r = ifrs_agent.chat(message)
        return r.response




def generate_llm_config(tool):
    # Define the function schema based on the tool's args_schema
    function_schema = {
        "name": tool.name.lower().replace(" ", "_"),
        "description": tool.description,
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
        },
    }

    if tool.args is not None:
        function_schema["parameters"]["properties"] = tool.args
    
    if tool.required is not None:
        function_schema["parameters"]["required"].append(tool.required)

    return function_schema


# Instantiate the ReadFileTool
coa_store_tool = CoaStoreTool()
ifrs_tool = IfrsTool()

llm_config_assistant = {
    # "Seed" : 42,
    "temperature": 0,
        "functions":[
            generate_llm_config(coa_store_tool),
            generate_llm_config(ifrs_tool)
        ],
        
    "config_list": config_list,
    "timeout": 120,
}

llm_config_assistant


{'temperature': 0,
 'functions': [{'name': 'chart_of_account_tool',
   'description': 'Use this tool when you need account information and account meta data.',
   'parameters': {'type': 'object',
    'properties': {'message': {'type': 'string',
      'description': 'The question to ask in relation to general ledger accounts and web search queries.'}},
    'required': ['message']}},
  {'name': 'ifrs_tool',
   'description': 'Use this tool when you need accounting standard information from IFRS standards.',
   'parameters': {'type': 'object',
    'properties': {'message': {'type': 'string',
      'description': 'The question to ask in relation IFRS documentation.'}},
    'required': ['message']}}],
 'config_list': [{'model': 'gpt-4-1106-preview',
   'api_key': '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'}],
 'timeout': 120}

## Build Crew

**0 UserProxyAgent**  
Description: Acts as the interface between the user and the other agents. It collects user input, such as specific requirements or modifications, and communicates this information to the appropriate agents.
Responsibilities: Gather user requirements, initiate the chart-building process by communicating with the ChartBuilderAgent, and present the final validated chart of accounts to the user.

**1. AccountStoreAgent**  
Description: This agent has direct access to the account store that contains all possible accounts. Its primary role is to provide the necessary account data to other agents upon request.  
Responsibilities: Retrieve and send account data from the store to the ChartBuilderAgent based on specific queries or criteria.

**2. ChartBuilderAgent**  
Description: Responsible for constructing the chart of accounts. This agent receives account data from the AccountStoreAgent and uses it to build or modify the chart of accounts according to the user's specifications or business rules.  

Responsibilities: Build or update the chart of accounts using the data provided, apply business rules or customization options, and send the draft chart to the ValidationAgent for review.

**3. ValidationAgent**  
Description: Checks the integrity and correctness of the chart of accounts created by the ChartBuilderAgent. This includes validating the structure against regulatory and company-specific standards.
Responsibilities: Validate the proposed chart of accounts, ensure compliance with standards, and provide feedback or approval back to the ChartBuilderAgent.

In [43]:
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=10,
    code_execution_config={
        "work_dir": "tmp",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
)

# Register the tool and start the conversation
user_proxy.register_function(
    function_map={
        coa_store_tool.name: coa_store_tool._run,
        ifrs_tool.name: ifrs_tool._run,
    }
)


# Account store assistant 
system_message_account_store_assistant = """
# Description: 
This agent has direct access to the account store using the function `chart_of_account_tool` that contains all possible accounts and its meta data. 
Your primary role is to provide the necessary account data to other agents upon request.

# Responsibilities: 
Retrieve and send account data from the function `chart_of_account_tool` to the ChartBuilderAgent based on specific queries or criteria. 


"""

account_store_agent = autogen.AssistantAgent(
            name="account_store_agent",
            llm_config=llm_config_assistant,
            system_message = system_message_account_store_assistant
            
        )

# chart builder
system_message_chart_builder = """
# Description: 
You are responsible for constructing a chart of accounts. You receive receives account data and account meta data from the AccountStoreAgent and uses it to build or modify the chart of accounts according to the user's specifications or business rules. 

# Responsibilities: 
Build or update the chart of accounts using the data provided, apply business rules or customization options, and send the draft chart to the ValidationAgent for review. 

Reply `TERMINATE` in the end when everything is done.
"""

chart_builder_agent = autogen.AssistantAgent(
            name="chart_builder_agent",
            llm_config=llm_config,
            system_message = system_message_chart_builder
            
        )


# Validate agent
system_message_validation_agent = """
# Description:  
This agent has direct access to the accounting standard form IFRS using the function `ifrs_tool`. 
Use the function `ifrs_tool` with a plane text as input. Call the function with a 'IFRS' accounting standard request.

Checks the integrity and correctness of the chart of accounts created by the ChartBuilderAgent. 
This includes validating the structure against regulatory and company-specific standards.


# Responsibilities: 
Validate the proposed chart of accounts unsing the tool `ifrs_tool`, ensure compliance with standards, and provide feedback or approval back to the ChartBuilderAgent.  
"""

validation_agent= autogen.AssistantAgent(
            name="validation_agent",
            llm_config=llm_config_assistant,
            system_message = system_message_validation_agent
            
        )

# group chat
groupchat = autogen.GroupChat(agents=[user_proxy, account_store_agent, chart_builder_agent, validation_agent], messages=[], max_round=12)

sytsm_message = """
    # Workflow

    1. UserProxyAgent to ChartBuilderAgent
    Task: UserProxyAgent collects initial requirements from the user for the custom chart of accounts and communicates these to the ChartBuilderAgent.

    2. ChartBuilderAgent to AccountStoreAgent
    Task: Once the requirements are understood, the ChartBuilderAgent requests specific account data from the AccountStoreAgent needed to build the chart.

    3. AccountStoreAgent to ChartBuilderAgent
    Task: AccountStoreAgent retrieves the necessary account data from the store and sends it back to the ChartBuilderAgent.

    4.ChartBuilderAgent to ValidationAgent
    Task: After constructing the initial chart of accounts, the ChartBuilderAgent submits it to the ValidationAgent for compliance checking and validation.

    5. ValidationAgent to ChartBuilderAgent
    Task: ValidationAgent returns feedback or approval to the ChartBuilderAgent. If modifications are required, the ChartBuilderAgent adjusts the chart accordingly.

    6. ChartBuilderAgent to UserProxyAgent
    Task: Once the chart is finalized and validated, the ChartBuilderAgent sends the completed chart back to the UserProxyAgent.

    7. UserProxyAgent to User
    Task: UserProxyAgent presents the final chart of accounts to the user and handles any further interactions or modifications requested by the user.

"""

manager = autogen.GroupChatManager(
    groupchat=groupchat, 
    llm_config=llm_config,
    system_message = system_message,
    
    
    )



# Experiment

In [49]:
prompt = """

Build a chart of account for a company that produces products. 
Provide the result in with meta data information in a table structure.

"""

In [45]:
# prompt = """
#     Little Electrodes, Inc. is a retailer that sells electronics and computer parts. On January 1, Little Electrode, Inc. sells a computer monitor to a customer.
#     How to record a journal entry for the sales invoice below to a customer "called  "Software Client"? 

#     INVOICE:  
#     '''
#         Total: 381.21€
#         VAT 19%: 72.41€
#         Gross Amount incl VAT:453.53€  
#     '''
#     # Task  
#     Provide the full journal entry in a table with the columns account_name, debit, credit.
#     Reference the account description for the used accounts from the account store.
    
#     Additionally, explain how to book a the bank statement transaction of the invoice based on web search result. 
    
#     Finally, use the ifrs tool to reference the journal entries rules.

#     Always structue your response with headlines.



# """

**CoA Crew**

In [50]:
def reset_agents():
    user_proxy.reset()
    account_store_agent.reset()
    chart_builder_agent.reset()
    validation_agent.reset()
    groupchat.reset()

reset_agents()


In [51]:
r = user_proxy.initiate_chat(
        manager, message= prompt
    )


[33muser_proxy[0m (to chat_manager):



Build a chart of account for a company that produces products. 
Provide the result in with meta data information in a table structure.



--------------------------------------------------------------------------------
[33maccount_store_agent[0m (to chat_manager):

[32m***** Suggested function Call: chart_of_account_tool *****[0m
Arguments: 
{"message":"chart of accounts for a manufacturing company"}
[32m**********************************************************[0m

--------------------------------------------------------------------------------
[35m
>>>>>>>> EXECUTING FUNCTION chart_of_account_tool...[0m
[1;3;38;5;200mThought: I need to use a tool to help me answer the question.
Action: tool_web_search
Action Input: {'input': 'chart of accounts for a manufacturing company'}
[0mAdded user message to memory: chart of accounts for a manufacturing company
=== Calling Function ===
Calling function: google_search_tool with args: {"query":"