In [1]:
%load_ext autoreload
%autoreload 2    

# Setup

In [2]:
DB_NAME = "./memories.db"

In [3]:
from broai.prompt_management.core import PromptGenerator
from broai.prompt_management.interface import Persona, Instructions, Examples, Example
from pydantic import BaseModel, Field
from typing import List
from broai.experiments.bro_agent import BroAgent
import json
from broai.interface import Context, Contexts
from broai.experiments.vector_store import DuckVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from package.jargon_store import JargonStore, JargonRecord

In [55]:
from broai.experiments.cross_encoder import ReRanker
rr = ReRanker()

  rr = ReRanker()


In [6]:
from broai.experiments.huggingface_embedding import BAAIEmbedding, EmbeddingDimension
baai_em = BAAIEmbedding()

  baai_em = BAAIEmbedding()
Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 172132.86it/s]


In [7]:
raw_memory = DuckVectorStore(db_name=DB_NAME, table="raw_memory", embedding=baai_em)
enrich_memory = DuckVectorStore(db_name=DB_NAME, table="enrich_memory", embedding=baai_em)
longterm_memory = DuckVectorStore(db_name=DB_NAME, table="longterm_memory", embedding=baai_em)
jargon_memory = JargonStore(db_name=DB_NAME, table="jargon_memory")

  raw_memory = DuckVectorStore(db_name=DB_NAME, table="raw_memory", embedding=baai_em)
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  enrich_memory = DuckVectorStore(db_name=DB_NAME, table="enrich_memory", embedding=baai_em)
  longterm_memory = DuckVectorStore(db_name=DB_NAME, table="longterm_memory", embedding=baai_em)


# Agent Flows: 
- JargonDetector
- JargonEditor

In [36]:
from agents.jargon_detector import JargonDetector, InputMessage
from agents.jargon_editor import JargonEditor, InputEditMessage
from agents.query_decomposer import QueryDecomposer, InputMessage
from agents.oracle import Oracle, InputOracle

In [68]:
def batch_conversation(original_message, model_name="us.meta.llama3-2-11b-instruct-v1:0"):
    potential_jargons = JargonDetector.run(request=InputMessage(message=original_message))
    detected_jargons = [j for j in potential_jargons.jargons if j.confidence>.5]
    jargon_knowledges = []
    for j in detected_jargons:
        jk = jargon_memory.fulltext_search(search_query="STORM")
        jargon_knowledges.extend(jk)
    jargon_knowledges_str = "\n\n".join([f"{enum+1}: {j.jargon}\nEvidence: {j.evidence}\nExplanation: {j.explanation}" for enum, j in enumerate(jargon_knowledges)])
    edited_message = JargonEditor.run(InputEditMessage(knowledge=jargon_knowledges_str, message=original_message))
    sub_queries = QueryDecomposer.run(InputMessage(message=edited_message.edited_message))
    retreived_contexts = []
    for sq in sub_queries.sub_queries:
        rc = longterm_memory.vector_search(search_query=sq, limit=10)
        retreived_contexts.extend(rc)
    id_list = []
    deduplicated_contexts = []
    for c in retreived_contexts:
        if c.id not in id_list:
            id_list.append(c.id)
            deduplicated_contexts.append(c)
    reranked_contexts, scores = rr.run(search_query=edited_message.edited_message, contexts=deduplicated_contexts, top_n=10)
    prior_knowledge = "\n\n".join([f"{c.context}" for c in reranked_contexts])
    Oracle.model.model_name = model_name
    # answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
    answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=", ".join(sub_queries.sub_queries)))
    return answer.answer

In [16]:
original_message = "What does STORM do in the research study?"

potential_jargons = JargonDetector.run(request=InputMessage(message=original_message))
detected_jargons = [j for j in potential_jargons.jargons if j.confidence>.5]
detected_jargons

[PotentialJargon(jargon='STORM', confidence=0.8)]

In [17]:
jargon_knowledges = []
for j in detected_jargons:
    jk = jargon_memory.fulltext_search(search_query="STORM")
    jargon_knowledges.extend(jk)

In [18]:
jargon_knowledges_str = "\n\n".join([f"{enum+1}: {j.jargon}\nEvidence: {j.evidence}\nExplanation: {j.explanation}" for enum, j in enumerate(jargon_knowledges)])
print(jargon_knowledges_str)

1: STORM
Evidence: We present STORM to automate the pre-writing stage
Explanation: STORM is a system that automates the pre-writing stage

2: STORM
Evidence: we propose the STORM paradigm for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking
Explanation: STORM is a paradigm for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking

3: STORM
Evidence: STORM simulates a conversation between a Wikipedia writer and a topic expert
Explanation: STORM is a system that simulates conversations

4: STORM
Evidence: STORM discovers different perspectives by surveying existing articles from similar topics
Explanation: STORM is a system or method for discovering perspectives and controlling question asking process

5: STORM
Evidence: We propose STORM, a writing system for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking
Explanation: STORM is a writing system for the Synthesis of Topic Outli

In [20]:
edited_message = JargonEditor.run(InputEditMessage(knowledge=jargon_knowledges_str, message=original_message))

What does STORM, a system that automates the pre-writing stage, simulates conversations, discovers different perspectives, and is a writing system for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking, do in the research study?


In [21]:
print(original_message)
print("="*10)
print(edited_message.edited_message)

What does STORM do in the research study?
What does STORM, a system that automates the pre-writing stage, simulates conversations, discovers different perspectives, and is a writing system for the Synthesis of Topic Outlines through Retrieval and Multi-perspective Question Asking, do in the research study?


In [24]:
QueryDecomposer.run(InputMessage(message=original_message))

DecomposedQueries(sub_queries=['What does STORM do in the research study', 'STORM in the research study'])

In [27]:
sub_queries = QueryDecomposer.run(InputMessage(message=edited_message.edited_message))
sub_queries.sub_queries

['What does STORM do',
 'STORM system',
 'Automated pre-writing stage',
 'Simulates conversations',
 'Discovers different perspectives',
 'Writing system for Synthesis of Topic Outlines',
 'Retrieval and Multi-perspective Question Asking']

In [28]:
retreived_contexts = []
for sq in sub_queries.sub_queries:
    rc = longterm_memory.vector_search(search_query=sq, limit=10)
    retreived_contexts.extend(rc)

In [29]:
len(retreived_contexts)

70

In [33]:
id_list = []
deduplicated_contexts = []
for c in retreived_contexts:
    if c.id not in id_list:
        id_list.append(c.id)
        deduplicated_contexts.append(c)

In [34]:
len(retreived_contexts), len(deduplicated_contexts)

(70, 28)

In [42]:
prior_knowledge = "\n\n".join([f"{c.context}" for c in deduplicated_contexts])

In [39]:
Oracle.model.model_name = "us.meta.llama3-2-3b-instruct-v1:0"
answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
print(answer.answer)

STORM is a system that automates the pre-writing stage by researching a topic, creating an outline, and simulating conversations between a writer and an expert to generate a full-length article. It discovers different perspectives by surveying existing articles from similar topics and uses these perspectives to control the question asking process. STORM prompts an LLM to generate a list of related topics and subsequently extracts the tables of contents from their corresponding Wikipedia articles, if such articles can be obtained through Wikipedia API. These tables of contents are concatenated to create a context to prompt the LLM to identify N perspectives P = {p1, ..., p<sup>N</sup> } that can collectively contribute to a comprehensive article on t. STORM creates an outline for an article by generating a draft outline from a topic and refining it with simulated conversations and LLM knowledge.


In [40]:
Oracle.model.model_name = "us.meta.llama3-2-11b-instruct-v1:0"
answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
print(answer.answer)

STORM simulates conversations between a Wikipedia writer and a topic expert, discovers different perspectives by surveying articles, and uses these perspectives to control question asking, prompting an LLM to generate a list of related topics and extract tables of contents to identify N perspectives that contribute to a comprehensive article.


In [41]:
Oracle.model.model_name = "us.meta.llama3-3-70b-instruct-v1:0"
answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
print(answer.answer)

STORM simulates conversations between a Wikipedia writer and a topic expert, discovers different perspectives by surveying articles, and creates an outline for an article by generating a draft outline from a topic and refining it with simulated conversations and LLM knowledge.


In [69]:
model_list = [
    "us.meta.llama3-2-3b-instruct-v1:0",
    "us.meta.llama3-2-11b-instruct-v1:0",
    "us.meta.llama3-3-70b-instruct-v1:0"
]

In [70]:
original_message = "What does STORM do in the research study?"
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM is a writing system that automates the pre-writing stage by researching a topic, creating an outline, and simulating conversations between a writer and an expert to generate a full-length article. It discovers different perspectives by surveying articles, uses them to control question asking, and prompts an LLM to generate a list of related topics and extract tables of contents to identify N perspectives that contribute to a comprehensive article. STORM simulates a conversation between a Wikipedia writer and a topic expert to generate questions and answers, using LLM and trusted sources to ensure factual information. It creates an outline for an article by generating a draft outline from a topic and refining it with simulated conversations and LLM knowledge.
model: us.meta.llama3-2-11b-instruct-v1:0
STORM is a writing system that automates the pre-writing stage by discovering different perspectives, simulating conversations, and creating a

In [71]:
original_message = "What does the dataset used in the study?"
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM uses a dataset of recent Wikipedia articles, specifically the FreshWiki dataset, to discover different perspectives on a given topic. It then simulates conversations between a writer and an expert grounded on trustworthy online sources to generate a comprehensive outline. The outline is refined using the LLM's intrinsic knowledge and the gathered conversations from different perspectives. STORM is a framework that automates the pre-writing stage by discovering perspectives, simulating information-seeking conversations, and creating a comprehensive outline for long-form articles.
model: us.meta.llama3-2-11b-instruct-v1:0
The dataset used in the STORM study is called FreshWiki, which is a dataset of recent high-quality Wikipedia articles.
model: us.meta.llama3-3-70b-instruct-v1:0
FreshWiki, a dataset of recent high-quality Wikipedia articles.


In [72]:
original_message = "How does STORM work in the study?"
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM is a framework that automates the pre-writing stage by discovering different perspectives, simulating information-seeking conversations, and creating a comprehensive outline. It uses a pseudo code that includes steps such as researching a topic, creating an outline, and simulating conversations between a writer and an expert to generate a full-length article. STORM uses a large language model to generate questions and answers, and it also uses trusted sources from the internet to ground the answer to each query. The framework is designed to assist the creation of grounded, long-form articles, and it has been evaluated in a study that shows it outperforms other approaches in terms of heading soft recall, entity recall, and full-length article quality.
model: us.meta.llama3-2-11b-instruct-v1:0
STORM is a framework that automates the pre-writing stage by discovering perspectives, simulating conversations, and creating outlines for long-form a

In [73]:
original_message = "Explain how STORM works in the study."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM is a novel system that automates the pre-writing stage by researching the topic and creating an outline using LLMs to ask incisive questions and retrieving trusted information from the Internet. It discovers different perspectives by surveying existing articles from similar topics and uses these perspectives to control the question asking process. STORM simulates a conversation between a Wikipedia writer and a topic expert to generate questions and answers, using LLM and trusted sources to ensure factual information. The system generates an outline and references for a given topic, considering multiple perspectives and simulated conversations.
model: us.meta.llama3-2-11b-instruct-v1:0
STORM works by discovering different perspectives through article surveys, simulating conversations between Wikipedia writers and topic experts, and automating the pre-writing stage. It uses LLMs to ask incisive questions, retrieve trusted information from th

In [74]:
original_message = "Explain how STORM works in the study in plain English."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM is a framework that automates the pre-writing stage by discovering different perspectives, simulating information-seeking conversations, and creating comprehensive outlines for long-form articles. It uses large language models to ask incisive questions and retrieve trusted information from the Internet, and then creates an outline that can be expanded into a full-length article. STORM has been evaluated and found to outperform other approaches in terms of outline and article quality, and has been shown to be effective in generating grounded and organized long-form articles.
model: us.meta.llama3-2-11b-instruct-v1:0
STORM is a framework that automates the pre-writing stage by discovering perspectives, simulating conversations, and creating outlines for long-form articles. It uses large language models to ask incisive questions, retrieve trusted information from the Internet, and generate a comprehensive outline. STORM's pre-writing stage in

In [75]:
original_message = "Explain how STORM works in the study to me like I'm a five years old."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

  return self.content_extractor(text)


[91mBoth parse_structured_output and content_extractor failed:
1 validation error for PotentialJargons
  Invalid JSON: trailing characters at line 41 column 1 [type=json_invalid, input_value='{\n    "$defs": {\n     ....8\n        }\n    ]\n}', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
BroAgent.parse_structured_output() got multiple values for argument 'text'[0m
[91mBoth parse_structured_output and content_extractor failed:
1 validation error for DecomposedQueries
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value="**Simple Explanation of ...ike cells or molecules.", input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
BroAgent.parse_structured_output() got multiple values for argument 'text'[0m
model: us.meta.llama3-2-3b-instruct-v1:0
The STORM implementation uses zero-shot prompting with the DSPy framework, achieving better results in automatic artic

  return self.content_extractor(text)


[91mBoth parse_structured_output and content_extractor failed:
1 validation error for PotentialJargons
  Invalid JSON: trailing characters at line 41 column 1 [type=json_invalid, input_value='{\n    "$defs": {\n     ....8\n        }\n    ]\n}', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
BroAgent.parse_structured_output() got multiple values for argument 'text'[0m
[91mBoth parse_structured_output and content_extractor failed:
1 validation error for DecomposedQueries
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value="**Simple Explanation of ...ike cells or molecules.", input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
BroAgent.parse_structured_output() got multiple values for argument 'text'[0m
model: us.meta.llama3-2-11b-instruct-v1:0
The STORM system evaluates outline quality using GPT-3.5 and GPT-4 models, achieving significant improvements in outl

  return self.content_extractor(text)


[91mBoth parse_structured_output and content_extractor failed:
1 validation error for PotentialJargons
  Invalid JSON: trailing characters at line 41 column 1 [type=json_invalid, input_value='{\n    "$defs": {\n     ....8\n        }\n    ]\n}', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
BroAgent.parse_structured_output() got multiple values for argument 'text'[0m
[91mBoth parse_structured_output and content_extractor failed:
1 validation error for DecomposedQueries
  Invalid JSON: expected value at line 1 column 1 [type=json_invalid, input_value="**Simple Explanation of ...ike cells or molecules.", input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/json_invalid
BroAgent.parse_structured_output() got multiple values for argument 'text'[0m
model: us.meta.llama3-3-70b-instruct-v1:0
The study examines error types in generated text, including improper inferential linking, inaccurate paraphrasing, an