In [12]:
# ---[ 1. Imports ]---
import os
import faiss
import numpy as np
import sqlite3
import ollama
from sentence_transformers import SentenceTransformer
from langgraph.graph import StateGraph, START, END
from typing import TypedDict, Annotated, Optional
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.store.memory import InMemoryStore
import tqdm



In [2]:

# ---[ 2. Config ]---
DB_PATH = os.path.abspath("cfa_agent_memory.db")  # ensure absolute path
INDEX_PATH = "cfa_vector.index"
TEXT_DUMP_PATH = "cfa_text_dump.txt"
MODEL = "all-MiniLM-L6-v2"
EPUB_DIR = "cfa_epub"

In [3]:

# ---[ 5. FAISS Retriever ]---
def load_faiss_retriever():
    index = faiss.read_index(INDEX_PATH)
    with open(TEXT_DUMP_PATH, "r", encoding="utf-8") as f:
        documents = f.read().split("\n")
    model = SentenceTransformer(MODEL)

    def retrieve(query, top_k=5):
        q_emb = model.encode([query])
        D, I = index.search(np.array(q_emb), top_k)
        return [documents[i] for i in I[0] if i < len(documents)]

    return retrieve

In [4]:
# ---[ 5. FAISS Retriever ]---
def load_faiss_retriever():
    index = faiss.read_index(INDEX_PATH)
    with open(TEXT_DUMP_PATH, "r", encoding="utf-8") as f:
        documents = f.read().split("\n")
    model = SentenceTransformer(MODEL)

    def retrieve(query, top_k=1):
        q_emb = model.encode([query])
        D, I = index.search(np.array(q_emb), top_k)
        return [documents[i] for i in I[0] if i < len(documents)]

    return retrieve

# ---[ 6. LangGraph RAG Flow ]---
class State(TypedDict):
    query: str
    retrieved_docs: Annotated[list[str], lambda x, y: x + y]
    answer: str

def retrieve_node(state: State) -> dict:
    retriever = load_faiss_retriever()
    docs = retriever(state["query"])
    return {"retrieved_docs": docs}

def generate_node(state: State) -> dict:
    context = "\n\n".join(state["retrieved_docs"])
    prompt = f"Answer based only on the following CFA material:\n\n{context}\n\nQuestion: {state['query']}"
    response = ollama.chat(model='deepseek-r1:1.5b', messages=[
        {"role": "user", "content": prompt}
    ])
    return {"answer": response["message"]["content"]}



In [5]:
%%time

fun = load_faiss_retriever()

CPU times: user 276 ms, sys: 328 ms, total: 604 ms
Wall time: 1.98 s


In [6]:
res = fun("Lognormal PDF and Tax")
res

['2.1. The Lognormal Distribution']

In [7]:
# ---[ 7. Graph Setup ]---
sqlite_connection = sqlite3.connect(DB_PATH, uri=True, check_same_thread=False)
checkpointer = SqliteSaver(sqlite_connection)
store = InMemoryStore()


workflow = StateGraph(State)
workflow.add_node("retrieve", retrieve_node)
workflow.add_node("generate", generate_node)
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)
graph = workflow.compile(checkpointer=checkpointer, store=store)

In [8]:
user_query = """
2. You have developed a set of criteria for evaluating distressed credits. Com-
panies that do not receive a passing score are classed as likely to go bankrupt
within 12 months. You gathered the following information when validating
the criteria:
■ Forty percent of the companies to which the test is administered will
go bankrupt within 12 months: P(non-survivor) = 0.40.
■ Fifty-five percent of the companies to which the test is administered
pass it: P(pass test) = 0.55.
■ The probability that a company will pass the test given that it will sub-
sequently survive 12 months, is 0.85: P(pass test | survivor) = 0.85.
Using the information validating your criteria, calculate the following:
A. B. What is P(pass test | non-survivor)?
Using Bayes’ formula, calculate the probability that a company is a
survivor, given that it passes the test; that is, calculate P(survivor | pass
test).
C. What is the probability that a company is a non-survivor, given that it
fails the test?
D. Is the test effective?
"""

In [9]:
%%time


result = graph.invoke({"query": user_query}, config={"configurable": {"thread_id": "cfa_session_01", "user_id": "daniel"}})

CPU times: user 228 ms, sys: 322 ms, total: 550 ms
Wall time: 18.7 s


In [10]:
print("\nAnswer:\n", result["answer"])


Answer:
 <think>
Okay, let's see. I need to solve this question about calculating probabilities related to the distressed credit criteria using Bayes' formula. Hmm, I remember Bayes' theorem from my studies, but I should probably take it step by step.

First, the problem gives some information:

- 40% of companies go bankrupt within a year (P(non-survivor) = 0.40).
- 55% pass the test (P(pass test) = 0.55).
- The probability that a company will pass given it survives is 85% (P(pass test | survivor) = 0.85).

The questions are A, B, C, and D.

Starting with question A: What's P(pass test | non-survivor)? So this is the probability of passing the test given that the company doesn't survive. I don't have direct information for this. Maybe it can be derived from other probabilities?

I know that the total probability of passing is 0.55, which includes both survivors and non-surpliers. Let me define some variables to clarify.

Let S = Survive (survivor), not S = doesn't survive (non-surviv

In [11]:
2

2