In [1]:
%load_ext autoreload
%autoreload 2    

# Setup

In [2]:
DB_NAME = "./memories.db"

In [3]:
from broai.prompt_management.core import PromptGenerator
from broai.prompt_management.interface import Persona, Instructions, Examples, Example
from pydantic import BaseModel, Field
from typing import List
from broai.experiments.bro_agent import BroAgent
import json
from broai.interface import Context, Contexts
from broai.experiments.vector_store import DuckVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from package.jargon_store import JargonStore, JargonRecord

In [5]:
from broai.experiments.cross_encoder import ReRanker
rr = ReRanker()

  rr = ReRanker()


In [6]:
from broai.experiments.huggingface_embedding import BAAIEmbedding, EmbeddingDimension
baai_em = BAAIEmbedding()

  baai_em = BAAIEmbedding()
Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 157090.04it/s]


In [7]:
raw_memory = DuckVectorStore(db_name=DB_NAME, table="raw_memory", embedding=baai_em)
enrich_memory = DuckVectorStore(db_name=DB_NAME, table="enrich_memory", embedding=baai_em)
longterm_memory = DuckVectorStore(db_name=DB_NAME, table="longterm_memory", embedding=baai_em)
jargon_memory = JargonStore(db_name=DB_NAME, table="jargon_memory")

  raw_memory = DuckVectorStore(db_name=DB_NAME, table="raw_memory", embedding=baai_em)
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  enrich_memory = DuckVectorStore(db_name=DB_NAME, table="enrich_memory", embedding=baai_em)
  longterm_memory = DuckVectorStore(db_name=DB_NAME, table="longterm_memory", embedding=baai_em)


# Agent Flows: 
- JargonDetector
- JargonEditor

In [61]:
from package.flow import OnlineFlow, JargonFlow, KnowledgeFlow, Payload
from agents.jargon_detector import JargonDetector
from agents.jargon_editor import JargonEditor
from agents.query_decomposer import QueryDecomposer
from agents.ai_oracle import AIOracle
from agents.kb_oracle import KBOracle

In [34]:
payload = Payload(message="What does STORM do in the study?")

In [72]:
jargon_flow = JargonFlow(jargon_memory=jargon_memory, jargon_detector=JargonDetector(), jargon_editor=JargonEditor())
knowledge_flow = KnowledgeFlow(longterm_memory=longterm_memory, reranker=rr)
# olf = OnlineFlow(oracle=AIOracle(), jargon_flow=jargon_flow)
olf = OnlineFlow(
    oracle=KBOracle(), 
    knowledge_flow=knowledge_flow,
    jargon_flow=jargon_flow,
    query_decomposer=QueryDecomposer(),
)

In [88]:
# olf.run(message="What is the dataset used in the study?")
# olf.run(message="What does STORM do in the study?")
# olf.run(message="What does STORM stand for?")
# olf.run(message="What does STORM stand for and how does it work? Explain it in step-by-step manner.")
olf.run(message="What does STORM stand for and how does it work? Could you please explain it like I'm a five years old? I'm really new to this thing.")

In [89]:
print(olf.payload.answer)

Let me explain STORM in a super simple way.

Imagine you want to write a big article about a topic, like dinosaurs. But, you don't know where to start. That's where STORM comes in. STORM is like a special tool that helps you write a great article by doing some of the hard work for you.

STORM stands for Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking. It's a big phrase, but don't worry, I'll break it down for you.

"Synthesis" means combining different things together to make something new. In this case, STORM combines different ideas and information to help you write a great article.

"Topic Outlines" means creating a plan or a map of what you want to write about. STORM helps you create a good plan by asking you questions and finding information about your topic.

"Retrieval" means finding information from different places, like books or the internet. STORM uses this information to help you write a great article.

"Multi-perspective Question Asking"

In [90]:
print(olf.payload.sub_queries)

['Meaning of STORM acronym', 'How STORM works', 'Explanation of STORM in simple terms']


In [91]:
print(olf.payload.jargon_contexts)

[JargonRecord(id='8a50dbfa-a635-42d8-82de-aae082439bc9', jargon='STORM', evidence='we propose the STORM paradigm for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking', explanation='STORM is short for Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking', metadata={'section': '1 Introduction', 'source': '.docs/test1/storm.md', 'type': 'document', 'sequence': 3}, created_at=Timestamp('2025-04-28 19:03:04.082071')), JargonRecord(id='07824b51-5ae3-42e1-9c7c-356b6ab6fe6c', jargon='STORM', evidence='we propose the STORM paradigm for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking', explanation='STORM is short for Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking', metadata={'section': '1 Introduction', 'source': '.docs/test1/storm.md', 'type': 'document', 'sequence': 2}, created_at=Timestamp('2025-04-28 19:03:02.162489')), JargonRecord(id='d6b4696b-6

In [84]:
print(olf.payload.jargon_knowledge)

1: STORM
Evidence: we propose the STORM paradigm for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking
Explanation: STORM is short for Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking

2: STORM
Evidence: we propose the STORM paradigm for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking
Explanation: STORM is short for Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking

3: STORM
Evidence: STORM discovers different perspectives by surveying existing articles from similar topics
Explanation: STORM is a system or method for discovering perspectives and controlling question asking process

4: STORM
Evidence: We present STORM to automate the pre-writing stage
Explanation: STORM is a system that automates the pre-writing stage

5: STORM
Evidence: We propose STORM, a writing system for the Synthesis of Topic Outlines through Retrieval and Multi-perspec

In [87]:
len(olf.payload.reranked_contexts)

10