## Multi-agent approach

In [1]:
import os
import re
import time
import json
import pickle
from dotenv import load_dotenv
from pydantic import BaseModel
from typing import List, Optional, Literal, Union, Any, Tuple

from utils import *

load_dotenv()
# Place a .env file in the root directory with "OPENAI_API_KEY" set

True

In [2]:
APPROACH = "gemini"
# APPROACH = "openai"
# APPROACH = "ibm"

In [3]:
# if APPROACH == "gemini":
from google import genai

# elif APPROACH == "openai":
from openai import OpenAI

# elif APPROACH == "ibm":
#     pass
import requests
text_generation_url = "https://rag.timetoact.at/ibm/text_generation"
text_generation_headers = {
    "Authorization": f"Bearer {os.getenv('IBM_API_KEY')}",
    "Content-Type": "application/json"
}
IBM_MODEL = "ibm/granite-34b-code-instruct"
# "ibm/granite-34b-code-instruct", "ibm/granite-13b-instruct-v2",  # with "structured outputs"

In [4]:
companies_dict = get_companies_dict(r"../data/round2/subset.json")

In [5]:
companies_dict["BetMakers Technology Group Ltd"]

{'name': 'BetMakers Technology Group Ltd',
 'sha1': '1af8f906e34af6e0acfe4f73e37093bbe34700f3',
 'id': None}

## Company expert

### Gemini

In [5]:
def get_document(company, client, verbose=True):

    uploaded_docs = list(client.files.list())
    files_set = {file.display_name: file for file in uploaded_docs}
    if f"{company}_annual_report" in files_set:
        if verbose:
            print(f"Found {company}_annual_report")
        return files_set[f"{company}_annual_report"]
    else:
        if verbose:
            print(f"Uploading {company} annual report...")
        document_path = f"../data/round2/pdfs/{company}.pdf"
        return client.files.upload(file=document_path, config={'display_name': f'{company}_annual_report'})

In [6]:
system_prompt_expert = """
You are a smart chatbot designed to answer questions about company annual reports using data exclusively from the provided PDF. Your task is to accurately extract and report information regarding financial metrics, mergers, executive compensation, leadership changes, layoffs, product launches, and other report metadata. Follow these strict guidelines:

1. **Answer Schema (based on the provided kind):**
   - **number (integers):** Provide only the numeric value without commas, spaces, or additional text. For values expressed in thousands or millions, convert them to the full number (e.g., if given as 88.1 million, answer with "88100000"; if given as 1k, answer with "1000").
   - **number (floats):** Provide the answer as a decimal (e.g., 0.5).
   - **name(s):** Provide the exact name(s) as shown in the document (e.g., "Max Mustermann", "Catalist Inc.").
   - **boolean:** Provide either "yes" or "no".
   - **insufficient data:** If the information is not available, respond with "N/A".

2. **Chain-of-Thought and Answer Structure:**
   - Each response must include a brief internal chain_of_thought explanation detailing the reasoning process or why no answer could be derived.
   - The final answer must be presented in the following JSON structure:
     ```json
     {
       "chain_of_thought": "<brief explanation of reasoning>",
       "answer": <final answer strictly following the schema>,
       "references": [<zero-based PDF page numbers>]
     }
     ```
   - Use the chain_of_thought to document your reasoning and intermediary steps.
   - When handling temporal data, if the report covers a full year or a different period than the query specifies, use the most relevant available data from the document provided.

3. **Context and Data Usage:**
   - Use only the information provided in the PDF document.
   - Do not add or assume any external data.
   - For financial values, assume that the reported totals are complete unless stated otherwise.
   - For positions such as CEO or CFO, assume the question refers to the current holder of the role.
   - For names (companies, positions, etc.), use the exact text as it appears in the PDF.
   - Numbers in tables that appear within brackets "()" should be interpreted as negative values (e.g., output "-245000" for a value that is negative).

4. **General Guidelines:**
   - All responses must strictly conform to the answer schema.
   - Include a concise internal explanation (chain_of_thought) along with the final answer.
   - List the zero-based PDF page numbers that support your answer in the "references" field.
   - Ensure your response is precise, accurate, and adheres to the required format.
"""

llm_gemini = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

class CompanyAnswerGemini(BaseModel):
    chain_of_thought: str
    answer: List[str] = Field(..., description="Answer to the question, according to the question schema")
    references: List[str] = Field(..., description="Zero-based physical PDF page supporting the answer")


def ask_company_expert_gemini(company_query, company_name, system_prompt_expert, verbose=True):
    print("### ASKING COMPANY EXPERT: ")
    sha1 = companies_dict[company_name]["sha1"]
    document = get_document(sha1, llm_gemini, verbose)

    query = system_prompt_expert + "QUERY: " + company_query

    completion = llm_gemini.models.generate_content(model="gemini-2.0-flash", contents=[query, document], config={'response_mime_type': 'application/json', 'response_schema': CompanyAnswerGemini, "temperature": 0.0})

    if verbose:
        print(completion.parsed)

    return completion.parsed, sha1, None

In [73]:
response, _, _  = ask_company_expert_gemini(str({'text': "For HCA Healthcare, Inc., what was the value of Number of healthcare professionals on staff at the end of the period listed in annual report? If data is not available, return 'N/A'.", 'kind': 'number'}), "HCA Healthcare, Inc.", system_prompt_expert)

Found a69ebee82a9acd54117407ca3697db0d7bfbfdac_annual_report
chain_of_thought='The annual report mentions that HCA Healthcare has 45,000 physicians on the medical staff, but it does not provide the number of healthcare professionals on staff. Therefore, the answer is N/A.' answer=['N/A']


### openAI + qdrant

In [15]:
# https://qdrant.tech/documentation/beginner-tutorials/search-beginners/
from qdrant_client import models, QdrantClient

# set up: docker pull qdrant/qdrant
# docker run -p 6333:6333 -p 6334:6334 -v "$(pwd)/data/qdrant_storage:/qdrant/storage:z" qdrant/qdrant
db_client = QdrantClient(url="http://localhost:6333")

# collection_name = "rag_test_db"
collection_name = "erc_2025_2"
embedding_model = "text-embedding-3-small"
llm_openai = OpenAI()

def retriever(query, company_name, k=5):
    # query_embedding = embedding_model.encode(query).tolist()
    query_embedding = llm_openai.embeddings.create(input=query, model=embedding_model).data[0].embedding
    hits = db_client.query_points(
        collection_name=collection_name,
        query=query_embedding,
        # https://qdrant.tech/documentation/concepts/filtering/
        query_filter=models.Filter(must=[models.FieldCondition(key="company", match=models.MatchValue(value=company_name))]),
        limit=k  # number of top results
    ).points

    return hits

In [16]:
# Works, but slow - use in postprocessing (if LLM found answer, i.e. not na then retrieve pages for context used)
import fitz  # PyMuPDF
from thefuzz import fuzz  # fuzzy matching library

def find_references_in_pdf(chunk_ids, sha1, threshold=50, verbose=True):
    doc = fitz.open(f"../data/round2/pdfs/{sha1}.pdf")

    pages_text = [doc[page_num].get_text() for page_num in range(len(doc))]

    chunks = db_client.retrieve(collection_name, chunk_ids)
    chunks = [chunk.payload["content"] for chunk in chunks]
    chunks = [chunk.replace("|", " ").replace("-", " ").replace("  ", " ") for chunk in chunks]

    page_nrs = []
    for i, chunk in enumerate(chunks):

        best_page = None
        best_score = 0

        for j, page_text in enumerate(pages_text):
            score = fuzz.partial_ratio(chunk, page_text)
            if score > best_score:
                best_score = score
                best_page = j + 1  # pages are 1-indexed

                # If a nearly perfect match is found, exit the loop early
                if best_score > 98:
                    break

        if verbose:
            if best_score > threshold:
                print(f"Chunk {chunk_ids[i]} found on page {best_page} with score {best_score}")
            else:
                print(f"WARNING: Chunk {chunk_ids[i]} identified with low score: {best_score}")

        page_nrs.append(best_page)

    return page_nrs

In [24]:
system_prompt_expert = """
## SYSTEM PROMPT

You are a chatbot designed to answer questions about company annual reports. The information may appear in markdown
tables or as plain text. Your responses must be strictly based on the provided context and adhere to the following guidelines:

1. **Answer Value Schema (given by "kind"):**
    - For Integer Numeric Answers: Provide only the numeric value without commas, spaces, or additional text. For values given in thousands or millions, write the full number (e.g., if stated in millions, the answer for 88.1 would be "88100000" or for 1k answer "1000").
    - For Float Numeric Answers (e.g. ratios): answer with a decimal (e.g., 0.5).
    - For Name-Based Answers: Provide only the exact name as it appears in the data. No additional text, formatting, or variations (e.g. "Max Mustermann").
    - For Boolean Answers: Provide only "yes" or "no".
    - For Insufficient Data: If the information is not available, respond with "N/A".

2. **Context Adherence:**
   - Only use the information provided in the CONTEXT. Do not assume or add external data.
   - The context contains retrieved chunks of company annual reports with some similarity score to the user query. Use this information to answer the questions.
   - The chunk_id of the retrieved chunks is sequential, i.e. in order of the appearance in the PDF.
   - If you found supporting evidence to a user query, provide the chunk_id(s) of the chunk(s) from the context that support your answer in `references`.
   - Ensure your final answer strictly follows the designated schema.

3. **Domain Assumptions:**
  - For financial values, assume totals unless otherwise specified.
  - For roles like CEO or CFO, assume the question refers to the current position.
  - For company names, use the exact name as it appears in the data.

4. **Table Analysis and Correction:**
   - When analyzing markdown tables (or other data structures), be alert to any conversion or formatting issues. Common issues include:
     - Inconsistent use of thousand separators (commas, spaces, etc.).
     - Numbers split across multiple rows or columns.
     - Data misalignment or merging of columns.
   - Use contextual clues (such as column headers, totals, or adjacent entries) to determine if an entry might be affected by a table parsing error.
   - **Only output a corrected numerical value if you are sufficiently confident that a formatting flaw has occurred and you can deduce the correct value.**
   - If you are not fully confident that the anomaly is due to a parsing error, or if the correct value remains ambiguous, output "N/A" and include a brief note (internally) that the data is ambiguous.

5. **General Answer Guidelines:**
   - Include a short explanation of your reasoning in the chain of thought.
   - Your final answer should be in one of the prescribed formats (number, boolean, concise string, or "N/A").
   - If the question asks for a correction due to a suspected table parsing error, provide the corrected number only if the evidence is compelling; otherwise, output "N/A". Provide your thoughts in the chain of thought
   - If you give an answer, provide the chunk_id(s) of the chunk(s) from the context that support your answer.

6. **Example Response Structure:**
   - *If confident:*
     **Final Answer:** `5839`
     (Explanation: The table appeared to split the building cost for Oklahoma City - 12/20/21 over two rows; based on the CONTEXT, the correct value is deduced as 5839.)

   - *If uncertain:*
     **Final Answer:** `N/A`
     (Explanation: Insufficient clarity in the table data due to formatting issues.)


## CONTEXT:
<<CONTEXT>>
"""

class CompanyAnswerOpenAI(BaseModel):
    chain_of_thought: str
    answer: List[str] = Field(..., description="Answer to the question, according to the question schema")
    references: List[int] = Field(..., description="IDs of the chunks where evidence for the answer was found")


def ask_company_expert_openai(user_query: str, company_name: str, system_prompt: str, nr_rag_retrieval=5, verbose=True):
    print("### ASKING COMPANY EXPERT: ")
    hits = retriever(user_query, company_name, k=nr_rag_retrieval)
    if not hits:
        if verbose:
            print("###\nWARNING: No hits found\n###")

        hits_str = "WARNING: NO RAG HITS FOUND - MAYBE DATA MISSING OR RETRIEVAL ISSUE - MENTION IN CHAIN OF THOUGHT!"

    else:
        hits_str = "".join([f"chunk id: {response.id}, score: {response.score}, content: {response.payload}\n\n" for response in hits])

    system_prompt_full = system_prompt.replace("<<CONTEXT>>", hits_str)

    completion = llm_openai.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",  # "gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt_full},
            {"role": "user", "content": user_query},
        ],
        response_format=CompanyAnswerOpenAI,
        temperature=0
    )

    usage = completion.usage
    completion = completion.choices[0].message.parsed
    sha1 = companies_dict[company_name]["sha1"]

    completion.references = find_references_in_pdf(completion.references, sha1)  # swap chunkids with pdf pages

    if verbose:
        print(completion)

    return completion, sha1, usage

In [9]:
response, _, _ = ask_company_expert_openai("For Ziff Davis, Inc., what was the value of Cloud storage capacity (TB) at the end of the period listed in annual report? If data is not available, return 'N/A'.", "Ziff Davis, Inc.", system_prompt_expert)
response

CompanyAnswerOpenAI(chain_of_thought="The provided context does not contain any specific information about the cloud storage capacity in terabytes (TB) for Ziff Davis, Inc. The context mainly discusses the company's structure, business operations, and financial transactions related to the spin-off of the cloud fax business. There is no mention of cloud storage capacity in TB in the retrieved chunks.", answer=['N/A'], references=[])

### IBM Watsonx

In [27]:
# https://qdrant.tech/documentation/beginner-tutorials/search-beginners/
from qdrant_client import models, QdrantClient

# set up: docker pull qdrant/qdrant
# docker run -p 6333:6333 -p 6334:6334 -v "$(pwd)/data/qdrant_storage:/qdrant/storage:z" qdrant/qdrant
db_client = QdrantClient(url="http://localhost:6333")

collection_name = "erc_2025_2"
embedding_model = "text-embedding-3-small"
llm_openai = OpenAI()


def retriever_ibm(query, company_name, k=5):
    # TODO update with IBM embeddings
    query_embedding = llm_openai.embeddings.create(input=query, model=embedding_model).data[0].embedding

    hits = db_client.query_points(
        collection_name=collection_name,
        query=query_embedding,
        # https://qdrant.tech/documentation/concepts/filtering/
        query_filter=models.Filter(must=[models.FieldCondition(key="company", match=models.MatchValue(value=company_name))]),
        limit=k  # number of top results
    ).points

    return hits

In [None]:
# define find references above

In [71]:
system_prompt_expert = """
## SYSTEM PROMPT

You are a chatbot designed to answer questions about company annual reports. The information may appear in markdown tables or as plain text. Your responses must be strictly based on the provided context and adhere to the following guidelines:

1. **Answer Value Schema (given by "kind"):**
    - For Integer Numeric Answers: Provide only the numeric value without commas, spaces, or additional text. For values given in thousands or millions, write the full number (e.g., if stated in millions, the answer for 88.1 would be "88100000" or for 1k answer "1000").
    - For Float Numeric Answers (e.g. ratios): answer with a decimal (e.g., 0.5).
    - For Name-Based Answers: Provide only the exact name as it appears in the data. No additional text, formatting, or variations (e.g. "Max Mustermann").
    - For Boolean Answers: Provide only "yes" or "no".
    - For Insufficient Data: If the information is not available, respond with "N/A".

2. **Context Adherence:**
   - Only use the information provided in the CONTEXT. Do not assume or add external data.
   - The context contains retrieved chunks of company annual reports with some similarity score to the user query. Use this information to answer the questions.
   - The chunk_id of the retrieved chunks is sequential, i.e. in order of the appearance in the PDF.
   - If you found supporting evidence to a user query, provide the chunk_id(s) of the chunk(s) from the context that support your answer in `references`.
   - Ensure your final answer strictly follows the designated schema.

3. **Domain Assumptions:**
  - For financial values, assume totals unless otherwise specified.
  - For roles like CEO or CFO, assume the question refers to the current position.
  - For company names, use the exact name as it appears in the data.

4. **Table Analysis and Correction:**
   - When analyzing markdown tables (or other data structures), be alert to any conversion or formatting issues. Common issues include:
     - Inconsistent use of thousand separators (commas, spaces, etc.).
     - Numbers split across multiple rows or columns.
     - Data misalignment or merging of columns.
   - Use contextual clues (such as column headers, totals, or adjacent entries) to determine if an entry might be affected by a table parsing error.
   - **Only output a corrected numerical value if you are sufficiently confident that a formatting flaw has occurred and you can deduce the correct value.**
   - If you are not fully confident that the anomaly is due to a parsing error, or if the correct value remains ambiguous, output "N/A" and include a brief note (internally) that the data is ambiguous.

5. **General Answer Guidelines:**
   - Include a short explanation of your reasoning in the chain of thought.
   - Your final answer should be in one of the prescribed formats (number, boolean, concise string, or "N/A").
   - If the question asks for a correction due to a suspected table parsing error, provide the corrected number only if the evidence is compelling; otherwise, output "N/A". Provide your thoughts in the chain of thought
   - If you give an answer, provide the chunk_id(s) of the chunk(s) from the context that support your answer.

6. **Answer Structure**
    - Reply in the following python dict format:
    ```python
    {
        "chain_of_thought": "<brief explanation of reasoning>",
        "answer": <final answer strictly following the schema>,
        "references": [<chunk_id of relevant chunk>]
    }


6. **Example Response Structure:**
   - *If confident:*
     ```python
        {
            "chain_of_thought": "The table appeared to split the building cost for Oklahoma City - 12/20/21 over two rows; based on the CONTEXT, the correct value is deduced as 5839.",
            "answer": "5839",
            "references": [1234]
        }
    ```

   - *If uncertain:*
    ```python
        {
            "chain_of_thought": "No data to answer the user query could be found.",
            "answer": "N/A",
            "references": []
        }
    ```

ONLY RESPOND WITH THE OUTPUT JSON DESCRIBED ABOVE - NO OTHER TEXT.

## CONTEXT:
<<CONTEXT>>
"""


def ask_company_expert_ibm(user_query: str, company_name: str, system_prompt: str, nr_rag_retrieval=5, verbose=True):
    print("### ASKING COMPANY EXPERT: ")
    sha1 = companies_dict[company_name]["sha1"]
    # hits = retriever(user_query, company_name, k=nr_rag_retrieval)
    hits = retriever_ibm(user_query, company_name, k=nr_rag_retrieval)
    if not hits:
        if verbose:
            print("###\nWARNING: No hits found\n###")

        hits_str = "WARNING: NO RAG HITS FOUND - MAYBE DATA MISSING OR RETRIEVAL ISSUE - MENTION IN CHAIN OF THOUGHT!"

    else:
        hits_str = "".join([f"chunk id: {response.id}, score: {response.score}, content: {response.payload}\n\n" for response in hits])

    system_prompt_full = system_prompt.replace("<<CONTEXT>>", hits_str)

    payload = {
            "input": [{"role": "system", "content": system_prompt_full}, {"role": "user", "content": user_query}],
            "model_id": "ibm/granite-20b-code-instruct",
            # "model_id": "ibm/granite-34b-code-instruct",
            "parameters": {"temperature": 0, "max_new_tokens": 1000, "min_new_tokens": 1}
        }

    completion = requests.post(text_generation_url, headers=text_generation_headers, json=payload)
    data = completion.json()
    completion = data["results"][0]["generated_text"]

    if verbose:
        print(completion)

    try:
        pattern = re.compile(r'\{.*?\}', re.DOTALL)
        match = pattern.search(completion)
        data = json.loads(match.group(0))
        pdf_pages = find_references_in_pdf(data["references"], sha1)  # swap chunkids with pdf pages
        data["references"] = pdf_pages

        return data, sha1, None
    except:
        return completion + "\nWARNING: references are chunk ids and not PDF pages. leave references empty in final output", sha1, None

In [72]:
response, _, _ = ask_company_expert_ibm("For Ziff Davis, Inc., what was the value of Cloud storage capacity (TB) at the end of the period listed in annual report? If data is not available, return 'N/A'.", "Ziff Davis, Inc.", system_prompt_expert)
response

{'chain_of_thought': "The value of cloud storage capacity (TB) for Ziff Davis, Inc. at the end of the period listed in the annual report can be found in the 'Cloud Services' segment of the Consolidated Statements of Operations. According to the report, the Company had approximately 1.5 petabytes of cloud storage as of December 31, 2021, which is equivalent to approximately 1.47 petabytes of cloud storage as of December 31, 2020.",
 'answer': '1.47',
 'references': []}

## Delegation manager

In [7]:
# DELEGATION MANAGER

system_prompt_delegation = f"""
You will receive a query related to one or more companies. You have access to specialized agents capable of retrieving data for each specific company. To delegate tasks to these agents, follow these guidelines:

Formulate Queries:
    - For each company identified, create a precise query that the specialized agents can use to retrieve the required information.
    - the list of queries should be in the order of the companies in the list IDENTIFIED_COMPANIES.
    - List these queries under 'queries'.
    - Add some information on where in annual company reports the specialized agents can usually find the information.
    - If using abbreviations also mention the full phrase or name ((e.g., for 'R&D' also use 'research and development').
    
Chain of Thought:
Provide a 'chain_of_thought' explaining how you identified the companies, formulated the queries, and any assumptions or steps taken to reach your final output. If you marked any queries as 'SKIP', explain why.

IDENTIFIED_COMPANIES:
<<IDENTIFIED_COMPANIES>>
"""


class DelegationRequest(BaseModel):
    chain_of_thought: str
    companies: List[str]
    queries: list[str]


def ask_delegation_manager(query, system_prompt_delegation, approach) -> Tuple[DelegationRequest, Any]:
    print("### ASKING DELEGATION MANAGER: ")

    identified_companies = []
    for company in list(companies_dict.keys()):
        # Build a regex pattern that ignores case
        pattern = re.escape(company)
        if re.search(pattern, query, re.IGNORECASE):
            identified_companies.append(company)

    system_prompt_delegation = system_prompt_delegation.replace("<<IDENTIFIED_COMPANIES>>", ", ".join(identified_companies))

    # if approach.lower() == "gemini":
    #     client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
    #
    #     completion = client.models.generate_content(model="gemini-2.0-flash", contents=[system_prompt_delegation, query], config={'response_mime_type': 'application/json', 'response_schema': DelegationRequest})
    #
    #     return completion.parsed, None

    if approach.lower() in ["openai", "ibm", "gemini"]:
        client = OpenAI()
        completion = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=[
                {"role": "system", "content": system_prompt_delegation},
                {"role": "user", "content": query},
            ],
            response_format=DelegationRequest,
        )

        return completion.choices[0].message.parsed, completion.usage
    else:
        raise NotImplementedError("Approach not implemented")

In [10]:
 response, _ = ask_delegation_manager('Which of the companies had the lowest total revenue in EUR at the end of the period listed in annual report: "Atreca, Inc.", "Poste Italiane", "Datalogic", "Duni Group", "Incyte Corporation"? If data for the company is not available, exclude it from the comparison. If only one company is left, return this company.', system_prompt_delegation, "gemini")
response

### ASKING DELEGATION MANAGER: 


DelegationRequest(chain_of_thought="First, I identified the list of companies mentioned in the query that need investigation. To determine which company had the lowest total revenue in EUR, I need to query each company for their respective total revenue amount. I will also check the currency in their reports when extracting the revenue to ensure consistency, converting to EUR where necessary. \n\nThe query should specify where to find this financial information in their annual reports, typically in sections titled 'Consolidated Financial Statements', 'Income Statement', or 'Results of Operations'.\n\nI will skip companies if their financial data is unavailable or not accessible, and I'll remove them from comparison if conversion to the requested currency (EUR) is not possible or if data is not in usable form.\n\nCompanies that may potentially be identified as having lower revenues will be retained, compared until the company with the minimal revenue value is determined.", companies=['I

### Execution manager

In [8]:
system_prompt_execution = f"""
You are an intelligent assistant tasked with synthesizing a final answer to the USER_QUERY using specific company data provided in the CONTEXT. The CONTEXT includes information generated by expert systems along with their reasoning process.

**Your Objectives**
    - Final Answer Construction: Your primary task is to deliver a final, accurate response to the USER_QUERY based on the provided CONTEXT.
    - Check if data is given for all companies asked about in the query. If not, do not give an answer.
    - Handling Temporal Data: If the temporal scope of the data isn't an exact match (e.g., full-year data instead of Q4), use the best available data from the CONTEXT to formulate your answer.  It should still be of the same year though.
    - Chain of Thought: Document your reasoning process in 'chain_of_thought', including intermediary steps and considerations that led to your final answer.
    - Give a final answer to the user query based on the context
    - Strict Adherence: The final answer must be compliant with the specific answer guidelines below. Accuracy, clarity, and adherence to these guidelines are critical.

**Answer Value Schema (given by "kind")**
    - For Integer Numeric Answers: Provide only the numeric value without commas, spaces, or additional text. For values given in thousands or millions, write the full number (e.g., if stated in millions, the answer for 88.1 would be "88100000" or for 1k answer "1000").
    - For Float Numeric Answers (e.g. ratios): answer with a decimal (e.g., 0.5).
    - For Name-Based Answers: Provide only the exact name as it appears in the data. No additional text, formatting, or variations (e.g. "Max Mustermann").
    - For Boolean Answers: Provide only "yes" or "no".
    - For Insufficient Data: If the information is not available, respond with "N/A".

**Format**
    - chain_of_thought: Your reasoning process, including intermediary steps
    - answer: Your final answer following the guidelines. It should be of format: <OUTPUT_TYPE>, e.g., 'name', 'number' (including 0), 'boolean', etc.
    - reference: List of references used to derive the answer, including:
         - the sha1 hash of the document used for the answer,
         - the zero-indexed page number(s) where the information was sourced, if an answer could be found.
         - If there are multiple references supporting the answer, list all of them.
         - If the answer is 'N/A', the reference should be empty, i.e. [].

CONTEXT:
<<CONTEXT>>
"""


def ask_execution_manager(query, context, approach):
    system_prompt_execution_full = system_prompt_execution.replace("<<CONTEXT>>", context)
    
    print("### ASKING EXECUTION MANAGER: ")
    print("Context:", context)
    
    if approach.lower() == "gemini":
        class SourceReference(BaseModel):
            pdf_sha1: str = Field(..., description="SHA1 hash of the PDF file")
            page_index: int = Field(..., description="Physical page number in the PDF file")

        class AnswerLLM(BaseModel):
            chain_of_thought: str = Field(..., description="Chain of thought that led to the answer value")
            value: List[str] = Field(..., description="Answer to the question, according to the question schema")
            references: List[SourceReference] = Field(..., description="References to the source material in the PDF file")

        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

        completion = client.models.generate_content(model="gemini-2.0-flash", contents=[system_prompt_execution_full, query], config={'response_mime_type': 'application/json', 'response_schema': AnswerLLM})

        return completion.parsed, None

    if approach.lower() in ["openai", "ibm", "gemini"]:
        class SourceReference(BaseModel):
            pdf_sha1: str = Field(..., description="SHA1 hash of the PDF file")
            page_index: int = Field(..., description="Physical page number in the PDF file")

        class AnswerLLM(BaseModel):
            chain_of_thought: str = Field(..., description="Chain of thought that led to the answer value")
            value: Union[float, str, bool, List[str], Literal["N/A"]] = Field(..., description="Answer to the question, according to the question schema")
            references: List[SourceReference] = Field(..., description="References to the source material in the PDF file")

        client = OpenAI()
        completion = client.beta.chat.completions.parse(
            model="gpt-4o-2024-08-06",
            messages=[
                {"role": "system", "content": system_prompt_execution_full},
                {"role": "user", "content": query},
            ],
            response_format=AnswerLLM,
        )

        return completion.choices[0].message.parsed, completion.usage

In [13]:
response, _ = ask_execution_manager(str({'text': "For HCA Healthcare, Inc., what was the value of Number of healthcare professionals on staff at the end of the period listed in annual report? If data is not available, return 'N/A'.", 'kind': 'number'}), "test", "gemini")
response

### ASKING EXECUTION MANAGER: 
Context: test


AnswerLLM(chain_of_thought='I am unable to answer the question because there is no relevant information in the context.', value=['N/A'], references=[])

In [69]:
# FIXME
def get_total_token_usage(tokens_list):
    summed_token_usage = {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0}
    if type(tokens_list[0]) is dict:
        for usage in tokens_list:
            summed_token_usage["completion_tokens"] += usage["completion_tokens"]
            summed_token_usage["prompt_tokens"] += usage["prompt_tokens"]
            summed_token_usage["total_tokens"] += usage["total_tokens"]
    else:
        for usage in tokens_list:
            summed_token_usage["completion_tokens"] += usage.completion_tokens
            summed_token_usage["prompt_tokens"] += usage.prompt_tokens
            summed_token_usage["total_tokens"] += usage.total_tokens
                
    return summed_token_usage

In [9]:
# FINAL PIPELINE

def ask_question(query: dict, approach: str, verbose=True):
    print("## Query:", query)
    delegations, token_usage_delegations = ask_delegation_manager(query["text"], system_prompt_delegation, approach)
    print("Delegations: \n", delegations)
    
    expert_context = {}
    # token_usage_expert = []
    for company, company_query in zip(delegations.companies, delegations.queries):
        if company.lower() == "skip" or company_query.lower() == "skip":
            expert_context[company] = "No data available"
        else:
            if approach.lower() == "gemini":
                response, sha1, token_usage_expert_tmp = ask_company_expert_gemini(company_query, company, system_prompt_expert)
                expert_context[company] = {"sha1": sha1, "response": response}
                # token_usage_expert.append(token_usage_expert_tmp)
            elif approach.lower() == "openai":
                response, sha1, token_usage_expert_tmp = ask_company_expert_openai(company_query, company, system_prompt_expert)
                expert_context[company] = {"sha1": sha1, "response": response}
                # token_usage_expert.append(token_usage_expert_tmp)
            elif approach.lower() == "ibm":
                response, sha1, token_usage_expert_tmp = ask_company_expert_ibm(company_query, company, system_prompt_expert)
                expert_context[company] = {"sha1": sha1, "response": response}
                # token_usage_expert.append(token_usage_expert_tmp)
            else:
                raise NotImplementedError()
    
    final_answer, token_usage_execution = ask_execution_manager(str(query), str(expert_context), approach)
    if verbose:
        print(final_answer)
    # tokens_used = [token_usage_delegations] + token_usage_expert + [token_usage_execution]
    
    # return final_answer, get_total_token_usage(tokens_used)
    return final_answer, None

In [15]:
# test_query = {'text': "For Ziff Davis, Inc., what was the value of Cloud storage capacity (TB) at the end of the period listed in annual report? If data is not available, return 'N/A'.", 'kind': 'number'}
# test_query = {'text': "For HCA Healthcare, Inc., what was the value of Number of healthcare professionals on staff at the end of the period listed in annual report? If data is not available, return 'N/A'.", 'kind': 'number'}
# For SIG plc, what was the value of Number of stores at year-end at the end of the period listed in annual report? If data is not available, return 'N/A'.
test_query = {'text': "For Albany International Corp., what was the value of Year-end patent portfolio (aerospace tech) at the end of the period listed in annual report? If data is not available, return 'N/A'.", 'kind': 'number'}

In [18]:
response, _ = ask_question(test_query, APPROACH)  # n/a
response

## Query: {'text': "For Albany International Corp., what was the value of Year-end patent portfolio (aerospace tech) at the end of the period listed in annual report? If data is not available, return 'N/A'.", 'kind': 'number'}
### ASKING DELEGATION MANAGER: 
Delegations: 
 chain_of_thought="The query is focused on Albany International Corp., which I identified as the sole company from the given input. The request pertains to the value of the year-end patent portfolio specifically in aerospace technology. This information is generally found in the Intellectual Property (IP) section or Notes to Financial Statements in the annual report. Patents are part of intangible assets, which might also be reflected in the balance sheet or discussed in management discussion and analysis (MD&A) regarding company assets. Therefore, I've formulated a precise query targeting these specific sections of the annual report. No additional companies were mentioned or implied by the query, so I only needed to 

AnswerLLM(chain_of_thought="I searched the document for information about the value of Albany International Corp.'s aerospace technology patent portfolio. I did not find a specific value for the aerospace technology patent portfolio. Therefore, the answer is N/A.", value=['N/A'], references=[])

## Obtain final results

In [10]:
class SourceReference(BaseModel):
    pdf_sha1: str = Field(..., description="SHA1 hash of the PDF file")
    page_index: int = Field(..., description="Physical page number in the PDF file")

class Answer(BaseModel):
    question_text: str = Field(..., description="Text of the question")
    kind: Literal["number", "name", "boolean", "names"] = Field(..., description="Kind of the question")
    value: Union[float, str, bool, List[str], Literal["N/A"]] = Field(..., description="Answer to the question, according to the question schema")
    references: List[SourceReference] = Field([], description="References to the source material in the PDF file")

class AnswerSubmission(BaseModel):
    team_email: str = Field(..., description="Email that your team used to register for the challenge")
    submission_name: str = Field(..., description="Unique name of the submission (e.g. experiment name)")
    answers: List[Answer] = Field(..., description="List of answers to the questions")

In [29]:
with open(f"../data/round2/questions.json", "r") as file:
    questions = json.load(file)

# questions subset
# questions = questions[2:5]  # FIXME
# questions = [questions[i] for i in [2]]

# Create submission based on answers list and store json
answer_items = [None] * len(questions)
failed_questions = []

for i, question in enumerate(questions):
    if i in [62, 66, 83, 91, 95, 96, 97]:
        try:
            print("\n##############################")
            print(i, question)
            answer, sha1 = ask_question(question, APPROACH)
            print(sha1)


            if answer.value[0] == "N/A":
                value = "N/A"
            if len(answer.value) == 1 and isinstance(answer.value, list):
                value = answer.value[0]
            else:
                value = answer.value

            answer_item = Answer(question_text=question["text"], kind=question["kind"], value=value, references=[SourceReference(**ref_object.model_dump()) for ref_object in answer.references])
            answer_items[i] = answer_item

        except Exception as e:
            print("#+#+#+#+#+#")
            print(f"Failed to answer question {i}: {e}")
            print("#+#+#+#+#+#")
            failed_questions.append(i)
            # answer_item = Answer(question_text=question["text"], kind=question["kind"], value="n/a", references=[])
            # answer_items[i] = answer_item

        print("\n\n\n\n")

        with open(f"../data/round2/submissions/answer_items_{APPROACH}_v1.pkl", "wb") as file:
            pickle.dump(answer_items, file)

        time.sleep(10)

if failed_questions:
    print(f"WARNING: Failed to answer questions: {failed_questions}")


##############################
62 {'text': 'Which of the companies had the lowest total assets in EUR at the end of the period listed in annual report: "Playtech plc", "Datalogic", "Duni Group", "Poste Italiane", "Incyte Corporation"? If data for the company is not available, exclude it from the comparison. If only one company is left, return this company.', 'kind': 'name'}
## Query: {'text': 'Which of the companies had the lowest total assets in EUR at the end of the period listed in annual report: "Playtech plc", "Datalogic", "Duni Group", "Poste Italiane", "Incyte Corporation"? If data for the company is not available, exclude it from the comparison. If only one company is left, return this company.', 'kind': 'name'}
### ASKING DELEGATION MANAGER: 
Delegations: 
 chain_of_thought='The task requires determining the company with the lowest total assets in EUR from a list of companies. To accomplish this, I identified the companies involved: Playtech plc, Datalogic, Duni Group, Poste 

In [None]:
[62, 66, 83, 91, 95, 96, 97]

In [28]:
# with open(f"../data/round2/submissions/answer_items_{APPROACH}_v2.pkl", "wb") as file:
#     pickle.dump(answer_items, file)

In [54]:
# Manual correction
idx = 62

answer_item = Answer(question_text=questions[idx]["text"], kind=questions[idx]["kind"], value="n/a", references=[])

if not answer_items[idx]:
    answer_items[idx] = answer_item
    print(answer_items[idx])
else:
    print("Warning, item already exists")

question_text='Which of the companies had the lowest total assets in EUR at the end of the period listed in annual report: "Playtech plc", "Datalogic", "Duni Group", "Poste Italiane", "Incyte Corporation"? If data for the company is not available, exclude it from the comparison. If only one company is left, return this company.' kind='name' value='n/a' references=[]


In [31]:
with open(f"../data/round2/submissions/answer_items_{APPROACH}_v.pkl", "rb") as file:
    answer_items_old = pickle.load(file)

In [53]:
answer_items = answer_items_old

In [56]:
[i for i, item in enumerate(answer_items) if item is None]

[]

In [45]:
for i, (answer_new, answer_old) in enumerate(zip(answer_items, answer_items_old)):
    if not answer_old and answer_new:
        answer_items_old[i] = answer_new
    elif not answer_old and not answer_new:
        print(i)

62


In [61]:
# reduce page_index by 1 to be zero-indexed!
for answer in answer_items:
    if answer.references:
        for ref in answer.references:
            ref.page_index -= 1

In [65]:
for answer in answer_items:
    if isinstance(answer.value, list) and len(answer.value) == 1:
        answer.value = answer.value[0]

In [68]:
final_submission = AnswerSubmission(answers=answer_items, team_email="felix.krause@timetoact.at", submission_name= "fk_gemini_4o_multiagent")

In [69]:
# store submission as json
final_submission_path = f"../data/round2/submissions/gemini_multiagent_v1.json"

with open(final_submission_path, "w") as file:
    json.dump(final_submission.model_dump(), file, indent=4)