I want to have a more intuitive way to search through my notes.
Some facts about my notes:
- I have a daily note that is formatted like: 2025-11-25. this daily note includes my todo list, work log, thinking, meeting notes, media, and a list of notes that were created/modified that day
- 


The thing needs to be able to keyword search or something like that for a starting point. Then search for the next step.


Give a ReAct agent access to database of markdown notes with information on it.

In [5]:
from dotenv import load_dotenv
import os
load_dotenv()
os.environ["VAULT_PATH"]="~/Obsidian/Notes Vault"

import dspy
azure_lm = dspy.LM(
    model=f"azure/gpt-4.1",
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_base=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_version="2023-03-15-preview"
)
dspy.configure(lm=azure_lm)

In [None]:
# create a vector database of my notes
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")

sentences = ["Paris is in France","Houston is in Texas","Berlin is in Germany","Tokyo is in Japan"]
sentences_meta =["France","Texas","Germany","Japan"]

embeddings = model.encode(sentences,convert_to_numpy=True)



In [None]:
#create the index
import faiss
import pickle

dim = embeddings.shape[1]
index= faiss.IndexFlatL2(dim)
index.add(embeddings)

faiss.write_index(index, "data/faiss/capital_index.faiss")
with open("data/faiss/capital_meta.pkl", "wb") as f:
    pickle.dump(sentences_meta, f)

In [None]:
#read/use the index
import faiss
import pickle

index = faiss.read_index('data/faiss/capital_index.faiss')
with open("data/faiss/capital_meta.pkl", "rb") as f:
    sentences_meta = pickle.load(f)
query = "Startups"
query_vec = model.encode([query],convert_to_numpy=True)
top_k = 2
result = index.search(query_vec, top_k)
D,I = index.search(query_vec, top_k)
for idx in I[0]:
    print(sentences_meta[idx])


In [None]:
#now use it on my notes
from tools.md_files import get_notes_list, get_note_content

notes_list = get_notes_list()
note_content=[]
for note in notes_list:
    print(note)
    content = get_note_content(note)
    content=note+'\n\n'+content
    note_content.append(content)


In [None]:
embeddings = model.encode(note_content,convert_to_numpy=True)

dim = embeddings.shape[1]
index= faiss.IndexFlatL2(dim)
index.add(embeddings)

faiss.write_index(index, "data/faiss/small_notes_index.faiss")
with open("data/faiss/small_notes_meta.pkl", "wb") as f:
    pickle.dump(notes_list, f)

In [None]:
query = "reinforcement learning"
query_vec = model.encode([query],convert_to_numpy=True)
top_k = 5
result = index.search(query_vec, top_k)
D,I = index.search(query_vec, top_k)
for idx in I[0]:
    print(notes_list[idx])

Now lets functionize this

In [None]:
def reindex_notes():
    from sentence_transformers import SentenceTransformer
    import faiss
    import pickle
    from tools.md_files import get_notes_list, get_note_content
    model = SentenceTransformer("all-MiniLM-L6-v2")

    notes_list = get_notes_list()
    note_content=[]
    for note in notes_list:
        content = get_note_content(note)
        content=note+'\n\n'+content
        note_content.append(content)

    embeddings = model.encode(note_content,convert_to_numpy=True)

    dim = embeddings.shape[1]
    index= faiss.IndexFlatL2(dim)
    index.add(embeddings)

    faiss.write_index(index, "data/faiss/notes_index.faiss")
    with open("data/faiss/notes_meta.pkl", "wb") as f:
        pickle.dump(notes_list, f)

def search_notes(query: str, top_k: int = 5):
    from sentence_transformers import SentenceTransformer
    import faiss
    import pickle
    model = SentenceTransformer("all-MiniLM-L6-v2")

    index = faiss.read_index('data/faiss/notes_index.faiss')
    with open("data/faiss/notes_meta.pkl", "rb") as f:
        notes_list = pickle.load(f)
    query_vec = model.encode([query],convert_to_numpy=True)
    result = index.search(query_vec, top_k)
    D,I = index.search(query_vec, top_k)
    return [notes_list[idx] for idx in I[0]]
search_notes("reinforcement learning")


#done! moved this to tools/md_files.py

In [2]:
from tools.md_files import search_notes,get_note_content

class NoteResearcher(dspy.Signature):
    """Read through notes to answer a question"""
    question: str = dspy.InputField(description="The question that needs to be answered.")
    answer: str = dspy.OutputField(description="The answer to the question.")

agent = dspy.ReAct(
    NoteResearcher,
    tools=[search_notes,get_note_content]
)

In [6]:
result = agent(question="What is the difference between no-interaction and late-interaction retrieval models?")

In [10]:
type(result)

dspy.primitives.prediction.Prediction

In [9]:
result = agent(question="What was I working on June 26th?")
result

Prediction(
    trajectory={'thought_0': 'To answer what you were working on June 26th, I should search for notes that mention "June 26th" or are dated around that time. I\'ll start by searching for notes with "June 26" as the query.', 'tool_name_0': 'search_notes', 'tool_args_0': {'query': 'June 26', 'top_k': 5}, 'observation_0': ['2025-05-04', '2025-06-21', 'Template/Template, Testing', 'Untitled 14', 'Untitled 13'], 'thought_1': 'None of the note titles directly reference June 26th, but the note titled "2025-06-21" is close in date and may contain information about work done around June 26th. I will check the content of "2025-06-21" to see if it mentions ongoing or upcoming work for June 26th.', 'tool_name_1': 'get_note_content', 'tool_args_1': {'note_name': '2025-06-21'}, 'observation_1': '[[2025-06-20]] - [[2025-06-22]]', 'thought_2': 'The note "2025-06-21" references the dates June 20th to June 22nd, but does not provide specific content about June 26th. I should try searching fo

In [None]:
traj = result.trajectory
for step_key, step_value in traj.items():
    if "observation" in step_key and len(step_value) > 300:
        step_value = step_value[:300] + "..."
    print(f"{step_key}: {step_value}")

thought_0: To answer what you were working on June 26th, I should search for notes that mention "June 26th" or are dated around that time. I'll start by searching for notes with "June 26" as the query.
tool_name_0: search_notes
tool_args_0: {'query': 'June 26', 'top_k': 5}
observation_0: ['2025-05-04', '2025-06-21', 'Template/Template, Testing', 'Untitled 14', 'Untitled 13']
thought_1: None of the note titles directly reference June 26th, but the note titled "2025-06-21" is close in date and may contain information about work done around June 26th. I will check the content of "2025-06-21" to see if it mentions ongoing or upcoming work for June 26th.
tool_name_1: get_note_content
tool_args_1: {'note_name': '2025-06-21'}
observation_1: [[2025-06-20]] - [[2025-06-22]]
thought_2: The note "2025-06-21" references the dates June 20th to June 22nd, but does not provide specific content about June 26th. I should try searching for notes with the query "June" to see if there are any notes from l

This second example shows exactly the problem that I was looking at earlier: vector search doesn't work incredibly well for dictionary mentions like dates and names.  But it's very good at picking up the *vibe*.

So next steps in this process is making better retrieval.  How can I efficiently do this?