In [None]:
import os
import sys
import glob
import openai
from tqdm import tqdm
from dotenv import load_dotenv
# from sentence_transformers import SentenceTransformer
# import faiss
# import numpy as np
import json

In [None]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY") 
print(openai.api_key)

In [None]:
# load financial documents for RAG 
from llama_index.core import GPTVectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.embeddings import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="all-MiniLM-L6-v2")

financial_docs_directory = os.path.join("../../data/financial_docs")
documents = SimpleDirectoryReader(financial_docs_directory).load_data() 
print(f"Loaded {len(documents)} documents from {financial_docs_directory}")

In [None]:
# build vector store index
index = GPTVectorStoreIndex.from_documents(documents, embed_model=embed_model)
storage_context = index.storage_context
storage_context.persist()
print(f"Vector store index has been built.")

In [None]:
# load index from storage 
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
# set query engine 
query_engine = index.as_query_engine(llm = "gpt-4o-mini", similarity_top_k = 2)

In [None]:
response = query_engine.query("Explain return on equity")

# Print the final LLM-generated response
print("🔍 LLM Response:\n", response.response)
print(f"Input tokens: {response.metadata['input_tokens']}")
print(f"Output tokens: {response.metadata['output_tokens']}")
print(response.metadata['usage'])  # If available


# Print the retrieved context chunks (aka source nodes)
print("\n📚 Retrieved Chunks:")
for i, node in enumerate(response.source_nodes):
    print(f"\n--- Chunk #{i+1} ---")
    print(node.node.text)

In [None]:
agents_path = r"..\..\agents"
if agents_path not in sys.path:
    sys.path.append(agents_path)
utils_path = r"..\..\utils"
if utils_path not in sys.path:
    sys.path.append(utils_path)

In [None]:
from base_agent import Agent
from initial_generator import InitialGeneratorAgent
from reviewer import ReviewerAgent
from challenger import ChallengerAgent
from refiner import RefinerAgent
# from decider import DeciderAgent

from utils import load_dataset, save_results, calculate_score, load_config, get_profile

In [None]:
datasheets_directory = r"..\..\data\question_sheets"
datasheets_files = glob.glob(os.path.join(datasheets_directory, "*.csv"))
# print(f"Found {len(datasheets_files)} CSV files.")

In [None]:
'''''
1. Initial Generation
'''''

initialAgent = InitialGeneratorAgent(model="gpt-4o-mini")


initial_generator_agent_results = [] 

config_directory = os.path.join("..\..\config")
topic_roles_path = os.path.join(config_directory, "topic_roles.json")
results_directory = os.path.join("..\..\data", "results")
os.makedirs(results_directory, exist_ok=True)

for sheet in tqdm(datasheets_files, desc = "Processing CSV files..."):
  print(f"Processing file: {sheet}")

  base_name = os.path.basename(sheet)           
  sheet_name, ext = os.path.splitext(base_name) 

  ReviewerAgent = ReviewerAgent(topic = sheet_name, model = "gpt-4o-mini", topic_roles_json=topic_roles_path)
  

  dataset = load_dataset(sheet)
  
  initial_guesses = []
  reviewer_guesses = [] 
  for index, row in tqdm(dataset.iterrows(), total=len(dataset), desc="Processing rows...", leave=False):
    question = (
      f"{row['question']}\n"
      f"A: {row['A']}\n"
      f"B: {row['B']}\n"
      f"C: {row['C']}\n"
      f"D: {row['D']}\n"
    )

    print(question)

    initial_response = initialAgent.process(question)
    print(f"initial response: {initial_response}")
    initial_guesses.append(initial_response)
    # combines the 'answer' and 'reasoning' in initial response dictionary
    # combined_initial_guess = f"{initial_response.get('answer')}\nReasoning: {initial_response.get('reasoning')}"
    '''''
    Question -> Initial: {answer, reasoning} -> Reviewer: {answer, reasoning} -> Challenger {answer, reasoning} -> Refiner {answer, reasoning} -> Decider {answer, reasoning}
    '''''
    print(f"initial guess: {initial_response}")
    reviewer_response = ReviewerAgent.process(question, initial_response)
    reviewer_guesses.append(reviewer_response)
    print(f"Reveiewer response: {reviewer_response}")

    ChallengerAgent = ChallengerAgent(topic="Finance", model="gpt-4o-mini", query_engine=query_engine, topic_roles_json=topic_roles_path) 
    challenger_response = ChallengerAgent.process()
    
  


  dataset["initial_guess"] = initial_guesses
  dataset["reveiwer_guess"] = reviewer_guesses
  # data["decider_guess"] = decider_guess


  
  # check correctness 
  initial_accuracy = calculate_score(dataset, answer_column="answer", guess_column="inital_guess")
  print(f"Accuracy for {os.path.basename(sheet)}: {initial_accuracy:.2f}%")
  
  reveiwer_accuracy = calculate_score(dataset, answer_column="answer", guess_column="reviewer_guess")
  print(f"Accuracy for {os.path.basename(sheet)}: {initial_accuracy:.2f}%")

  output_filename = f"{sheet_name}_gpt-4o-mini{ext}"

  output_file = os.path.join(results_directory, output_filename)
  save_results(dataset, output_file)
  
  initial_generator_agent_results.append(dataset)
  
  break



