In [24]:
import os
import PyPDF2
import markdown2
import json
import pickle
import pandas as pd

from docx import Document as DocxDocument
from pptx import Presentation

from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings.base import OpenAIEmbeddings

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# Rinats well documented sample project
# https://github.com/trustbit/private-poc-fmw-content-generator/tree/main/backend-and-kb/src/fmw

from dotenv import load_dotenv

load_dotenv()

True

## Get Companies

In [25]:
df = pd.read_csv("data/dataset.csv") 
df

Unnamed: 0,sha1,date,name,size
0,ce9e5024041b2ece2bafa2a9d9516bb174ee8949,2022-10-31,"Anixa Biosciences, Inc.",3996701
1,f71415f9ca0cff70e5fa193616b6197f361130ed,2023-02-21,"Maravai LifeSciences Holdings, Inc.",4033642
2,4a9d2b853e05970776121a810460f0962a18c5a1,2022-XX,KLA Corporation,1181894
3,f973dd219c534accb0d4e72d8e12f51284d48d10,2023-01-01,"Ameresco, Inc.",10648267
4,4e27f4c3402c657d548760cb3a164b036cefaabb,2022-12-31,Battery Minerals Limited,3650701
...,...,...,...,...
7490,a2afcd8165a6dbd0058682680b65d3638a5800eb,2023-02-02,"Arrow Electronics, Inc",922750
7491,215df84494756bd4feebc973657835ef7f14ee16,2022-12-31,Synertec,10474497
7492,20fb970d8705289e835b408c575351295ac16f5f,2022-09-30,TE Connectivity,5753097
7493,c7f3a8c0a38c756438950ce3085076adb4241a32,2022-01-01,OTC_ADDDF,12603054


In [26]:
# Create sample for development
COMPANIES = ["Ethernity Networks Ltd", "Limbach Holdings, Inc.", "Accuray Incorporated"]

if COMPANIES:
    df_sample = df[df.name.isin(COMPANIES)]
else:
    df_sample = df.copy()

In [27]:
df_sample

Unnamed: 0,sha1,date,name,size
1183,99be213e4e689294ebae809bfa6a1b5024076286,2022-01-01,"Limbach Holdings, Inc.",2023552
5801,e51b7204b91cbe7709bd3218e7d2d0c2b8dbb438,2023-01-01,Ethernity Networks Ltd,1180978
6859,dd78f748262b8ffa62de6484143ff55b38af24c7,2022-06-30,Accuray Incorporated,3317389


## Get company name from user query

TODO could be more sophisticated

In [28]:
import re
from fuzzywuzzy import fuzz

def normalize_name(name):
    # Normalize by removing extra spaces and converting to lowercase
    return re.sub(r'\s+', ' ', name.strip().lower())

def fuzzy_match(companies, input_string, threshold=85):
    matched_companies = []
    norm_input = normalize_name(input_string)
    
    for company in companies:
        norm_company = normalize_name(company)
        prev_token = ""
        
        # Fuzzy match each token
        for curr_token in norm_input.split():
            token = prev_token + " " + curr_token
            
            match_score = fuzz.ratio(norm_company, token)
            prev_token = curr_token
            
            if match_score >= threshold:
                matched_companies.append(company)
                break  # Stop once a match is found for this company
            
    return matched_companies


def find_exact_matches(companies, input_string):
    # normalized_input = normalize_name(input_string)
    
    matched_companies = []
    
    for company in companies:
        if company in input_string:
            matched_companies.append(company)
    
    return matched_companies


def find_companies(input_string):
    # Wrapper to use later and allow easy function change
    return find_exact_matches(df.name, input_string)


input_string = ("limbach holding inc and ethernity networks buys the accuracy incorporated, ESEA, Five9, Inc.")

matches = find_exact_matches(df.name, input_string)
print(matches)

['EA', 'ESEA', 'Five9, Inc.']


## Load LLM data

In [29]:
FOLDER_PATH = r"C:\Users\felix.krause\code\trustbit\enterprise-rag-challenge\samples"

# embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2" # Hugging Face model

In [30]:
from llama_parse import LlamaParse

parser = LlamaParse(
    result_type="markdown",  # "markdown" and "text" are available
    verbose=True
)

# Function to load text from different file types
def load_text_from_file(file_path):
    _, file_extension = os.path.splitext(file_path)
    text = ""
    
    if file_extension == ".pdf":
        # TODO https://www.reddit.com/r/LangChain/comments/18yxacm/extracting_data_from_pdf_containing_complex_tables/
        with open(file_path, 'rb') as f:
            reader = PyPDF2.PdfReader(f, strict=False)
            for page in reader.pages:
                text += page.extract_text() + "\n"
    else:
        pass
    
    return text


# Function to load documents from a folder
def load_documents_from_folder(folder_path, companies=[]):
    documents = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file[:-4] in companies: #or len(companies) == 0:
                print("Loading", file)
                file_path = os.path.join(root, file)

                text = load_text_from_file(file_path)
                if text:
                    documents.append(Document(page_content=text, metadata={"source": file_path}))
    return documents

In [40]:
# Could try LLama approach
# TODO https://medium.com/the-ai-forum/rag-on-complex-pdf-using-llamaparse-langchain-and-groq-5b132bd1f9f3
# doc = parser.load_data(os.path.join(FOLDER_PATH, "e51b7204b91cbe7709bd3218e7d2d0c2b8dbb438.pdf"))
# index = VectorStoreIndex.from_documents(doc)

## Try to detect tables from pdf

In [8]:
"""
import camelot

# Extract tables from a PDF file
tables = camelot.read_pdf(os.path.join(FOLDER_PATH, "e51b7204b91cbe7709bd3218e7d2d0c2b8dbb438.pdf"), pages="all")

# Convert the first table to a DataFrame
df = tables[0].df

# Save the DataFrame as a CSV file
tables[0]
"""



<Table shape=(1, 1)>

In [47]:
# tables[40].df

Unnamed: 0,0,1,2,3
0,STRATEGIC REPORT\nCORPORATE GOVERNANCE,,FINANCIAL STATEMENTS\nFINANCIAL STATEMENTS,73\n73


## Build retriever per company

In [31]:
def get_store_name(string):
    string = string.lower()
    return string.replace(" ", "_").replace(",", "").replace(".", "").replace("'", "")

In [84]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) # TODO more sophisticated
sha1_dict = df_sample.set_index("name").to_dict()["sha1"]

for company in df_sample.name.to_list():
    company_docs = load_documents_from_folder(FOLDER_PATH, [sha1_dict[company]])
    
    if not company_docs:
        raise ValueError(f"No documents found for company: {company}")
    
    texts = text_splitter.split_documents(company_docs)
    embeddings = OpenAIEmbeddings()
    db = FAISS.from_documents(texts, embeddings)
    
    with open(f"dbs-llmx/{get_store_name(company)}.db", "wb") as f:
        pickle.dump(db.serialize_to_bytes(), f)

FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead


Loading 99be213e4e689294ebae809bfa6a1b5024076286.pdf


FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead
FloatObject (b'0.00-40') invalid; use 0.0 instead


Loading e51b7204b91cbe7709bd3218e7d2d0c2b8dbb438.pdf
Loading dd78f748262b8ffa62de6484143ff55b38af24c7.pdf


In [15]:
def get_retriever_dict(companies):
    retriever_dict = {}
    
    for company in companies:
        with open(f"dbs-llmx/{get_store_name(company)}.db", "rb") as f:
            db_bytes = pickle.load(f)
        
        db_temp = FAISS.deserialize_from_bytes(db_bytes, OpenAIEmbeddings(),
                                      allow_dangerous_deserialization=True)
        
        retriever_dict[company] = db_temp.as_retriever()
    
    return retriever_dict

In [16]:
retriever_dict = get_retriever_dict(df_sample.name.to_list())

In [19]:
def get_context(query, retriever_dict):
    companies = find_companies(query)
    
    if not companies:
        raise ValueError("No company found in the query")
    
    context = {}
    print(companies)
    for company in companies:
        context[company] = retriever_dict[company].get_relevant_documents(query)
        # TODO add sophisticated retriever for tables?
        
    return context

## Structured inference

In [50]:
MODEL = "gpt-4o-mini-2024-07-18"

# load system prompt from .md
with open("instructions/answer_guidelines.md") as f:
    ANSWER_GUIDELINES = f.read()
    
with open("instructions/fin_info.md") as f:
    FIN_INFO = f.read()



system_prompt = (
    f"You are an intelligent assistant tasked with answering questions based on specific company data provided in the context. "
    
    f"Please provide a chain of thought on how you arrived at your final answer in 'chain_of_thought', providing intermediary results. \n"
    f"Then give a final answer in 'answer' where you strictly adhere to the following guidelines: \n {ANSWER_GUIDELINES} \n "
    f"Ensure that your answers are strictly compliant with these guidelines. Accuracy and adherence to the format are crucial. \n"
    
    # f"Your primary task is to accurately identify and extract specific financial metrics, ratios, and counts, even when synonymous "
    # f"or contextually similar terms are used. Here is some useful information about these: \n {FIN_INFO}"
)

In [72]:
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class AnswerEvent(BaseModel):
    chain_of_thought: str
    answer: int | str


def ask_company_expert(query):
    print("# ASKING COMPANY EXPERT")
    context = get_context(query, retriever_dict)
    completion = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt + f"Context: \n {context}"},
            {"role": "user", "content": query},
        ],
        response_format=AnswerEvent,
    )
    
    return completion.choices[0].message.parsed

In [43]:
ask_company_expert("How many assets does 'Accuray Incorporated' have in 2022 in dollars?")

['Accuray Incorporated']


AnswerEvent(chain_of_thought="The total assets for 'Accuray Incorporated' in 2022 are stated in the consolidated balance sheets provided. The relevant value for total assets in 2022 is given as $472,849 (in thousands). To convert this into dollars, we multiply this figure by 1,000. Thus, the total assets for 'Accuray Incorporated' in 2022 amount to 472849000 dollars.", answer=472849000)

In [96]:
ask_company_expert("How many assets does 'Ethernity Networks Ltd' have in 2022 in dollars?")

['Ethernity Networks Ltd']


AnswerEvent(chain_of_thought='The provided context does not contain specific information regarding the total assets of Ethernity Networks Ltd for the year 2022. Therefore, I cannot provide an answer as the necessary data is absent.', answer='n/a')

In [69]:
ask_company_expert("What are the total R&D expenses of 'Ethernity Networks Ltd' in 2021?")

['Ethernity Networks Ltd']


AnswerEvent(chain_of_thought='In the provided context, it is mentioned that the total R&D expenses for Ethernity Networks Ltd in 2021 were 5,550,912.', answer=5550912)

In [71]:
ask_company_expert("Has 'Ethernity Networks Ltd' more assets than 'Accuray Incorporated' in 2022?")

['Ethernity Networks Ltd', 'Accuray Incorporated']


AnswerEvent(chain_of_thought="The provided context only contains detailed financial information about 'Ethernity Networks Ltd', including revenue and operating loss figures for 2022, but it lacks specific data regarding the total assets of both 'Ethernity Networks Ltd' and 'Accuray Incorporated'. Without these asset figures, it's not possible to compare the two companies' assets directly. Hence, I cannot determine whether 'Ethernity Networks Ltd' had more assets than 'Accuray Incorporated' in 2022.", answer='n/a')

## Multiple agents

In [73]:
# DELEGATION MANAGER

INVOLVED_COMPANIES = df_sample.name.to_list()

system_prompt_delegation = (
    f"You will receive questions about data from one or more companies. "
    f"You have access to specialised agents in providing data for each company. "
    f"To access them, you need to provide the company name in a list of 'companies' and the respective query in a list of 'queries'. \n"
    f"For example, if the question is about if company X has more assets than company Y in 2022, you would provide the following: \n"
    f"companies: ['X', 'Y'], queries: ['How many assets does 'X' have in 2022 in dollars?', 'How many assets does 'Y' have in 2022 in dollars?'] \n"
    f"The company name has to be exactly one of these: {INVOLVED_COMPANIES}. \n"
    f"Also provide a chain of thought on how you arrived at your final answer in 'chain_of_thought'. \n"
)


class DelegationRequest(BaseModel):
    chain_of_thought: str
    companies: list[str]
    queries: list[str]


def ask_delegation_manager(query):    
    print("# ASKING DELEGATION MANAGER: ")
    completion = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_delegation},
            {"role": "user", "content": query},
        ],
        response_format=DelegationRequest,
    )
    
    return completion.choices[0].message.parsed

In [46]:
ask_delegation_manager("Has 'Ethernity Networks Ltd' more liabilities than 'Accuray Incorporated' in 2022?")

ManagerRequest(chain_of_thought="To determine if 'Ethernity Networks Ltd' has more assets than 'Accuray Incorporated' in 2022, I need to compare their total assets for that year. Therefore, I will first query the total assets of 'Ethernity Networks Ltd' for 2022 and then do the same for 'Accuray Incorporated'. This will allow me to compare the two values directly.", companies=['Ethernity Networks Ltd', 'Accuray Incorporated'], queries=["How many assets does 'Ethernity Networks Ltd' have in 2022 in dollars?", "How many assets does 'Accuray Incorporated' have in 2022 in dollars?"])

In [45]:
ask_delegation_manager("How many assets does 'Ethernity Networks Ltd' have in 2022 in dollars?")

ManagerRequest(chain_of_thought="To answer the question regarding the assets of 'Ethernity Networks Ltd' in 2022, I need to access the data provider concerning this specific company and ask for the relevant financial information. Herein, the query will be about the total assets it holds in 2022 in dollar value, which is a direct question about the company's financial stature in that year.", companies=['Ethernity Networks Ltd'], queries=["How many assets does 'Ethernity Networks Ltd' have in 2022 in dollars?"])

In [74]:
# EXECUTION MANAGER

system_prompt_execution = (
    f"You are an intelligent assistant tasked with answering questions based on specific company data provided in the context. "
    "Also the chain of thought of the expert providing the data is given. "
    "Your task is to provide a final answer to the USER_QUERY based on the data provided (CONTEXT). \n"
    
    f"Please provide a chain of thought on how you arrived at your final answer in 'chain_of_thought', providing intermediary results. \n"
    f"Then give a final answer in 'answer' where you strictly adhere to the following guidelines: \n {ANSWER_GUIDELINES} \n "
    f"Ensure that your answers are strictly compliant with these guidelines. Accuracy and adherence to the format are crucial. \n"
    
    "USER_QUERY: {QUERY} \n"
    
    "CONTEXT: {CONTEXT} \n"
)

class ExecutionRequest(BaseModel):
    chain_of_thought: str
    answer: int | str

def ask_execution_manager(query, context):
    system_prompt_execution_full = system_prompt_execution.replace("{QUERY}", query).replace("{CONTEXT}", context)
    
    print("# ASKING EXECUTION MANAGER: ")
    print("Query:", query)
    print("Context:", context)
    
    completion = client.beta.chat.completions.parse(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_execution_full},
            {"role": "user", "content": query},
        ],
        response_format=ExecutionRequest,
    )
    
    return completion.choices[0].message.parsed

In [75]:
# FINAL PIPELINE

def ask_question(query):
    delegations = ask_delegation_manager(query)
    print("Delegations: \n", delegations)
    expert_context = {}
    
    for company, company_query in zip(delegations.companies, delegations.queries):
        expert_context[company] = ask_company_expert(company + ": " + company_query)
    
    final_answer = ask_execution_manager(query, str(expert_context))
    
    return final_answer

In [76]:
ask_question("Who is the CEO of Ethernity Networks?") # David Levi

# ASKING DELEGATION MANAGER: 
Delegations: 
 chain_of_thought='To answer the question about the CEO of Ethernity Networks, I need to access data specifically about Ethernity Networks Ltd. Thus, I will formulate a request that includes only this company. The query will focus on retrieving the current CEO of Ethernity Networks.' companies=['Ethernity Networks Ltd'] queries=['Who is the CEO of Ethernity Networks Ltd?']
# ASKING COMPANY EXPERT
['Ethernity Networks Ltd']
# ASKING EXECUTION MANAGER: 
Query: Who is the CEO of Ethernity Networks?
Context: {'Ethernity Networks Ltd': AnswerEvent(chain_of_thought='The context specifically lists the Directors of Ethernity Networks Ltd, and it clearly identifies David Levi as the Chief Executive Officer. This provides a direct answer to the question about who the CEO is.', answer='David Levi')}


ExecutionRequest(chain_of_thought='The context specifically lists the Directors of Ethernity Networks Ltd, and it clearly identifies David Levi as the Chief Executive Officer. This provides a direct answer to the question about who the CEO is.', answer='David Levi')

In [71]:
ask_question("Has 'Ethernity Networks Ltd' more liabilities than 'Accuray Incorporated' in 2022?") # Accuray: 419,660,000$, Ethernity: 12,257,291$ -> no is correct answer

ASKING DELEGATION MANAGER: 
Delegations: 
 chain_of_thought="To determine if 'Ethernity Networks Ltd' has more liabilities than 'Accuray Incorporated' in 2022, I need to gather specific financial data for both companies for that year. This involves two queries: one for the liabilities of 'Ethernity Networks Ltd' and another for 'Accuray Incorporated'. I will then compare the two values to analyze which company has higher liabilities." companies=['Ethernity Networks Ltd', 'Accuray Incorporated'] queries=["How many liabilities does 'Ethernity Networks Ltd' have in 2022 in dollars?", "How many liabilities does 'Accuray Incorporated' have in 2022 in dollars?"]
ASKING COMPANY EXPERT
['Ethernity Networks Ltd']
ASKING COMPANY EXPERT
['Accuray Incorporated']
ASKING EXECUTION MANAGER: 
Query: Has 'Ethernity Networks Ltd' more liabilities than 'Accuray Incorporated' in 2022?
Context: {'Ethernity Networks Ltd': AnswerEvent(chain_of_thought='To find the total liabilities for Ethernity Networks Ltd

ExecutionRequest(chain_of_thought='I need to compare the total liabilities for both companies in 2022. From the data, Ethernity Networks Ltd has total liabilities of 1,121,909 dollars. For Accuray Incorporated, the total liabilities calculated from thousands is 419,660,000 dollars. Now, comparing the two figures: 1,121,909 is significantly less than 419,660,000. Therefore, Ethernity Networks Ltd does not have more liabilities than Accuray Incorporated.', answer='no')

## Inference with chain (old)

In [7]:
# Create vector store
# https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/vectorstore/
# Load documents from the specified folder
documents = load_documents_from_folder(FOLDER_PATH, df_sample.sha1.to_list())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) # TODO more sophisticated
texts = text_splitter.split_documents(documents)

if len(texts) == 0:
    raise ValueError("No text found in the specified folder")

In [8]:
embeddings = OpenAIEmbeddings() # deprecated?
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever()

In [54]:
# Store vector database
# with open("data/first_sample_db_3", "wb") as f:
#     pickle.dump(db.serialize_to_bytes(), f)

In [8]:
# Load vector database
# with open("data/vector_db_uni", "rb") as f:
#     db_bytes = pickle.load(f)
#     db = FAISS.deserialize_from_bytes(db_bytes, OpenAIEmbeddings(),
#                                       allow_dangerous_deserialization=True)
#     retriever = db.as_retriever()

In [31]:
llm = ChatOpenAI(temperature=0)
# llm = llm.bind(logprobs=True, top_logprobs=3)
# could try local models as well

system_prompt_2 = (
    f"You are an intelligent assistant tasked with answering questions based on specific company data provided in the context. "
    f"Please provide a chain of thought on how you arrived at your final answer in 'chain_of_thought'. \n"
    f"Then give a final answer in 'answer' where you strictly adhere to the following guidelines: \n {ANSWER_GUIDELINES} \n "
    f"Ensure that your answers are strictly compliant with these guidelines. Accuracy and adherence to the format are crucial. \n"
    
    # f"Your primary task is to accurately identify and extract specific financial metrics, ratios, and counts, even when synonymous "
    # f"or contextually similar terms are used. Here is some useful information about these: \n {fin_info}"
    
    "Context: \n {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt_2),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)


# Function to ask a question
def ask_question(query):
    # retriever = get_retriever(query)
    
    chain = create_retrieval_chain(retriever, question_answer_chain)
    
    response = chain.invoke({"input": query})
    answer = response["answer"]
    print(f"Question: {query}\nAnswer: {answer}")

```python
    ('What was the {fin_metric} of "{company}" in {time_frame}?', "number"),
    ('How much did "{company}" spend on {focus_area} in {time_frame}?', "number"),
    ('What was the {ratio_or_metric} of "{company}" in {time_frame}?', "number"),
    ('How many {count_metric} did "{company}" have in {time_frame}?', "number"),
    ('Which company had a higher {fin_metric}: "{company1}", "{company2}" or "{company3}", in {time_frame}?', "name"),
    ('Did "{company1}" have a greater {ratio_or_metric} than "{company2}" in {time_frame}?', "boolean"),
    ('How much more did "{company1}" spend on {focus_area} compared to "{company2}" in {time_frame}?', "number"),
    ('Who is the {role} in the company "{company}"?', "name"),
```

In [None]:
# multiple agents
# retriever per company
# -> first check if we need several processes to answer the questions (e.g. check revenues of 3 companies, then use these values with a final agent)
# chain of though: output json with chain of thought and final answer

In [28]:
# Ask a question
ask_question("How many assets does 'Accuray Incorporated' have in 2022 in dollars?") # 472.849 

Question: How many assets does 'Accuray Incorporated' have in 2022 in dollars?
Answer: chain_of_thought: 
1. Look for the Consolidated Balance Sheets section to find the total assets for Accuray Incorporated in 2022.
2. Identify the value listed under "Total current assets" and any other asset categories to calculate the total assets for the company in 2022.

answer: 352890


In [29]:
ask_question("How many assets does 'Accuray Incorporated' have in 2021 in dollars?") # 480.098

Question: How many assets does 'Accuray Incorporated' have in 2021 in dollars?
Answer: Chain of Thought:
1. Look for the Consolidated Balance Sheets section.
2. Find the total assets value for 'Accuray Incorporated' for June 30, 2021.

Answer:
352773


In [32]:
ask_question("How many liabilities does 'Accuray Incorporated' have in 2021 in dollars?") # 411.258

Question: How many liabilities does 'Accuray Incorporated' have in 2021 in dollars?
Answer: Chain of Thought:
1. Liabilities can be calculated by subtracting total assets from total equity, as liabilities = assets - equity.
2. The total assets for 2021 are not provided directly but can be calculated by summing up the current assets and other assets like property, equipment, etc.
3. The total equity for 2021 is not directly given but can be calculated by adding common stock, additional paid-in capital, accumulated other comprehensive income (loss), and accumulated deficit from the stockholders' equity statement.
4. Once total assets and total equity are calculated, liabilities can be determined.

Answer:
$378,000

Chain of Thought:
- Total assets for 2021 are calculated as $350,890 (current assets) + $12,685 (property and equipment) + $13,879 (investment in joint venture) + $16,798 (operating lease right-of-use assets) + $57,840 (goodwill) + $250 (intangible assets) = $452,342.
- Total 

In [78]:
ask_question("How many stores did 'Strike Energy Limited' have in the end of fiscal year 2021?") # n/a

Question: How many stores did 'Strike Energy Limited' have in the end of fiscal year 2021?
Answer: n/a


In [83]:
# Total R&D Expenses of Ethernity Networks
ask_question("What are the total R&D expenses of 'Ethernity Networks' in 2021?") # 5 550 912

ask_question("What are the total R&D expenses of 'Ethernity Networks' in 2022?") # 6 618 795

Question: What are the total R&D expenses of 'Ethernity Networks' in 2021?
Answer: 5550912
Question: What are the total R&D expenses of 'Ethernity Networks' in 2022?
Answer: 6618795
