In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

from load_pdfs import load_pdfs
from preprocessing import preprocess_documents
from retrievers import build_retrievers, get_retriever_for_query
from llm_model import get_llm
from unified_ask import ask_unified
from printers import print_sources

print("Starting pipeline...")
docs = load_pdfs()
db, split_docs = preprocess_documents(docs)

bm25, vec, hybrid = build_retrievers(db, split_docs)
llm = get_llm()

In [None]:
# UC 1: Find the CAPP final report from 2024
start = time.perf_counter()
question = "Find the CAPP final report from 2024."
retr = get_retriever_for_query(question, db, bm25, hybrid)

result = ask_unified(question, llm, retr)
print("\nResponse:\n", result.get("answer", ""))
print_sources(result)
print(f"UC: {time.perf_counter() - start:,.1f}s")

In [None]:
# UC 2: Summarize CAPP final report
start = time.perf_counter()
question = "Summarize the CAPP final report from 2024"
retr = get_retriever_for_query(question, db, bm25, hybrid)
result = ask_unified(question, llm, retr)

print("\n--- UC2: Summarize Particular Documents ---\n")
print("Response:\n", result.get("answer", ""))
print_sources(result)
print(f"UC: {time.perf_counter() - start:,.1f}s")

In [None]:
# UC 3: Find Documents by Contents
start = time.perf_counter()
question = "Find documents related to system executive policies on AI"
retr = get_retriever_for_query(question, db, bm25, hybrid)
result = ask_unified(question, llm, retr)

print("\n--- UC3: Find Documents by Contents ---\n")
print("Response:\n", result.get("answer", ""))
print_sources(result)
print(f"UC: {time.perf_counter() - start:,.1f}s")

In [None]:
# UC 4: Finding Particular Information
start = time.perf_counter()
question = "When were votes on AI policies conducted?"
retr = get_retriever_for_query(question, db, bm25, hybrid)
result = ask_unified(question, llm, retr)

print("\n--- UC4: Finding Particular Information ---\n")
print("Response:\n", result.get("answer", ""))
print_sources(result)
print(f"UC: {time.perf_counter() - start:,.1f}s")

In [None]:
# UC 5: Finding Related Information
start = time.perf_counter()
question = "Show me the history of resolutions on GE"
retr = get_retriever_for_query(question, db, bm25, hybrid)
result = ask_unified(question, llm, retr)

print("\n--- UC5: Finding Related Information ---\n")
print("Response:\n", result.get("answer", ""))
print_sources(result)
print(f"UC: {time.perf_counter() - start:,.1f}s")

In [None]:
# UC 6: Refinement of Found Information
start = time.perf_counter()
question = "In the history of resolutions you showed me, which ones are supportive or opposing GE reforms?"
retr = get_retriever_for_query(question, db, bm25, hybrid)
result = ask_unified(question, llm, retr)

print("\n--- UC6: Refinement of Found Information ---\n")
print("Response:\n", result.get("answer", ""))
print_sources(result)
print(f"UC: {time.perf_counter() - start:,.1f}s")