In [2]:
%load_ext autoreload
%autoreload 2    

# Setup

In [1]:
DB_NAME = "./memories.db"

In [2]:
from broai.prompt_management.core import PromptGenerator
from broai.prompt_management.interface import Persona, Instructions, Examples, Example
from pydantic import BaseModel, Field
from typing import List
from broai.experiments.bro_agent import BroAgent
import json
from broai.interface import Context, Contexts
from broai.experiments.vector_store import DuckVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from package.jargon_store import JargonStore, JargonRecord

In [4]:
from broai.experiments.cross_encoder import ReRanker
rr = ReRanker()

  rr = ReRanker()


In [5]:
from broai.experiments.huggingface_embedding import BAAIEmbedding, EmbeddingDimension
baai_em = BAAIEmbedding()

  baai_em = BAAIEmbedding()
Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 190650.18it/s]


In [6]:
raw_memory = DuckVectorStore(db_name=DB_NAME, table="raw_memory", embedding=baai_em)
enrich_memory = DuckVectorStore(db_name=DB_NAME, table="enrich_memory", embedding=baai_em)
longterm_memory = DuckVectorStore(db_name=DB_NAME, table="longterm_memory", embedding=baai_em)
jargon_memory = JargonStore(db_name=DB_NAME, table="jargon_memory")

  raw_memory = DuckVectorStore(db_name=DB_NAME, table="raw_memory", embedding=baai_em)
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  enrich_memory = DuckVectorStore(db_name=DB_NAME, table="enrich_memory", embedding=baai_em)
  longterm_memory = DuckVectorStore(db_name=DB_NAME, table="longterm_memory", embedding=baai_em)


# Agent Flows: 
- JargonDetector
- JargonEditor

In [7]:
from agents.jargon_detector import JargonDetector, InputMessage
from agents.jargon_editor import JargonEditor, InputEditMessage
from agents.query_decomposer import QueryDecomposer, InputMessage
from agents.oracle import Oracle, InputOracle

In [8]:
def batch_conversation(original_message, model_name="us.meta.llama3-2-11b-instruct-v1:0"):
    potential_jargons = JargonDetector.run(request=InputMessage(message=original_message))
    detected_jargons = [j for j in potential_jargons.jargons if j.confidence>.5]
    proxy_message = original_message
    if len(detected_jargons) > 0:
        jargon_knowledges = []
        for j in detected_jargons:
            jk = jargon_memory.fulltext_search(search_query="STORM")
            jargon_knowledges.extend(jk)
    
        jargon_knowledges_str = "\n\n".join([f"{enum+1}: {j.jargon}\nEvidence: {j.evidence}\nExplanation: {j.explanation}" for enum, j in enumerate(jargon_knowledges)])
        edited_message = JargonEditor.run(InputEditMessage(knowledge=jargon_knowledges_str, message=original_message))
        proxy_message = edited_message.edited_message
        
    sub_queries = QueryDecomposer.run(InputMessage(message=proxy_message))
    retreived_contexts = []
    for sq in sub_queries.sub_queries:
        rc = longterm_memory.vector_search(search_query=sq, limit=10)
        retreived_contexts.extend(rc)
    id_list = []
    deduplicated_contexts = []
    for c in retreived_contexts:
        if c.id not in id_list:
            id_list.append(c.id)
            deduplicated_contexts.append(c)
    reranked_contexts, scores = rr.run(search_query=proxy_message, contexts=deduplicated_contexts, top_n=10)
    prior_knowledge = "\n\n".join([f"{c.context}" for c in reranked_contexts])
    Oracle.model.model_name = model_name
    # answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
    answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=", ".join(sub_queries.sub_queries)))
    return answer

In [10]:
# original_message = "What does STORM do in the research study?"
original_message = "Explain how STORM works in the study to me like I'm a five years old."
potential_jargons = JargonDetector.run(request=InputMessage(message=original_message))
detected_jargons = [j for j in potential_jargons.jargons if j.confidence>.5]
detected_jargons

[PotentialJargon(jargon='STORM', confidence=0.8)]

In [28]:
jargon_knowledges = []
for j in detected_jargons:
    jk = jargon_memory.fulltext_search(search_query="STORM")
    jargon_knowledges.extend(jk)

In [29]:
jargon_knowledges_str = "\n\n".join([f"{enum+1}: {j.jargon}\nEvidence: {j.evidence}\nExplanation: {j.explanation}" for enum, j in enumerate(jargon_knowledges)])
print(jargon_knowledges_str)




In [30]:
edited_message = JargonEditor.run(InputEditMessage(knowledge=jargon_knowledges_str, message=original_message))

In [31]:
print(original_message)
print("="*10)
print(edited_message.edited_message)

Explain how STORM works in the study to me like I'm a five years old.
Explain how STORM (Stochastic Optical Reconstruction Microscopy), a type of super-resolution microscopy, works in the study in a simple way, like I'm a five years old.


In [24]:
QueryDecomposer.run(InputMessage(message=original_message))

DecomposedQueries(sub_queries=['What does STORM do in the research study', 'STORM in the research study'])

In [27]:
sub_queries = QueryDecomposer.run(InputMessage(message=edited_message.edited_message))
sub_queries.sub_queries

['What does STORM do',
 'STORM system',
 'Automated pre-writing stage',
 'Simulates conversations',
 'Discovers different perspectives',
 'Writing system for Synthesis of Topic Outlines',
 'Retrieval and Multi-perspective Question Asking']

In [28]:
retreived_contexts = []
for sq in sub_queries.sub_queries:
    rc = longterm_memory.vector_search(search_query=sq, limit=10)
    retreived_contexts.extend(rc)

In [29]:
len(retreived_contexts)

70

In [33]:
id_list = []
deduplicated_contexts = []
for c in retreived_contexts:
    if c.id not in id_list:
        id_list.append(c.id)
        deduplicated_contexts.append(c)

In [34]:
len(retreived_contexts), len(deduplicated_contexts)

(70, 28)

In [42]:
prior_knowledge = "\n\n".join([f"{c.context}" for c in deduplicated_contexts])

In [39]:
Oracle.model.model_name = "us.meta.llama3-2-3b-instruct-v1:0"
answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
print(answer.answer)

STORM is a system that automates the pre-writing stage by researching a topic, creating an outline, and simulating conversations between a writer and an expert to generate a full-length article. It discovers different perspectives by surveying existing articles from similar topics and uses these perspectives to control the question asking process. STORM prompts an LLM to generate a list of related topics and subsequently extracts the tables of contents from their corresponding Wikipedia articles, if such articles can be obtained through Wikipedia API. These tables of contents are concatenated to create a context to prompt the LLM to identify N perspectives P = {p1, ..., p<sup>N</sup> } that can collectively contribute to a comprehensive article on t. STORM creates an outline for an article by generating a draft outline from a topic and refining it with simulated conversations and LLM knowledge.


In [40]:
Oracle.model.model_name = "us.meta.llama3-2-11b-instruct-v1:0"
answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
print(answer.answer)

STORM simulates conversations between a Wikipedia writer and a topic expert, discovers different perspectives by surveying articles, and uses these perspectives to control question asking, prompting an LLM to generate a list of related topics and extract tables of contents to identify N perspectives that contribute to a comprehensive article.


In [41]:
Oracle.model.model_name = "us.meta.llama3-3-70b-instruct-v1:0"
answer = Oracle.run(InputOracle(prior_knowledge=prior_knowledge, message=edited_message.edited_message))
print(answer.answer)

STORM simulates conversations between a Wikipedia writer and a topic expert, discovers different perspectives by surveying articles, and creates an outline for an article by generating a draft outline from a topic and refining it with simulated conversations and LLM knowledge.


# Test model

In [11]:
model_list = [
    "us.meta.llama3-2-3b-instruct-v1:0",
    "us.meta.llama3-2-11b-instruct-v1:0",
    "us.meta.llama3-3-70b-instruct-v1:0"
]

In [19]:
original_message = "What does STORM do in the research study?"
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM is a writing system that automates the pre-writing stage for creating Wikipedia-like articles from scratch. It discovers different perspectives by surveying existing articles from similar topics and uses these perspectives to control the question asking process. STORM simulates a conversation between a Wikipedia writer and a topic expert to generate questions and answers, using LLM and trusted sources to ensure factual information.

The system consists of three main stages:

1. **Discovering different perspectives**: STORM discovers different perspectives by surveying existing articles from similar topics and extracting tables of contents from their corresponding Wikipedia articles. These tables of contents are concatenated to create a context to prompt the LLM to identify N perspectives that can collectively contribute to a comprehensive article on the given topic.

2. **Simulating conversations**: STORM simulates a conversation between a

In [20]:
original_message = "What does the dataset used in the study?"
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
The study used the FreshWiki dataset, which consists of 100 samples of human-written articles under 3000 words. This dataset was selected for comparison with the proposed STORM system.
model: us.meta.llama3-2-11b-instruct-v1:0
The dataset used in the STORM study is called FreshWiki, which is a dataset of recent high-quality Wikipedia articles.
model: us.meta.llama3-3-70b-instruct-v1:0
The dataset used in the STORM study is called FreshWiki, which is a collection of recent high-quality Wikipedia articles. This dataset was curated for the purpose of evaluating the effectiveness of the STORM system in generating outlines and full-length articles.


In [21]:
original_message = "How does STORM work in the study?"
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
The STORM writing system is a framework that automates the pre-writing stage for creating Wikipedia-like articles from scratch. It works by discovering different perspectives on a given topic, simulating conversations between a Wikipedia writer and a topic expert, and creating a comprehensive outline.

Here's a step-by-step explanation of how STORM works:

1. **Discovering perspectives**: STORM identifies different perspectives on a given topic by surveying existing articles from similar topics. It uses these perspectives to control the question asking process.

2. **Simulating conversations**: STORM simulates a conversation between a Wikipedia writer and a topic expert. In each round of the conversation, the LLM-powered Wikipedia writer generates a single question based on the topic, its assigned perspective, and the conversation history. The conversation history enables the LLM to update its understanding of the topic and ask follow-up questio

In [22]:
original_message = "Explain how STORM works in the study."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
Based on the provided prior knowledge, here's a summary of how STORM works:

STORM is a writing system that automates the pre-writing stage for creating Wikipedia-like articles from scratch. It consists of three main stages:

1. **Discovering perspectives**: STORM discovers different perspectives on a given topic by surveying existing Wikipedia articles from similar topics. It uses these perspectives to control the question asking process.
2. **Simulating conversations**: STORM simulates a conversation between a Wikipedia writer and a topic expert. The writer generates questions based on the topic, its assigned perspective, and the conversation history. The expert's answers are grounded on trusted sources from the Internet.
3. **Creating the outline**: STORM creates an outline by generating a draft outline from the topic and refining it with simulated conversations and LLM knowledge.

The algorithm for STORM is as follows:

1. Discover perspecti

In [12]:
original_message = "Explain how STORM works in the study in plain English."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
Here's a simplified explanation of how STORM works:

**What is STORM?**
STORM is a system that automates the pre-writing stage for creating Wikipedia-like articles from scratch. It uses large language models (LLMs) to research a topic, create an outline, and simulate conversations between a writer and a topic expert.

**How does STORM work?**

1. **Discovering perspectives**: STORM starts by surveying existing articles from similar topics to discover different perspectives on the given topic.
2. **Simulating conversations**: STORM simulates a conversation between a Wikipedia writer and a topic expert. The writer asks questions based on the topic and its assigned perspective, and the expert provides answers grounded on trustworthy online sources.
3. **Creating an outline**: After the conversations, STORM creates an outline by generating a draft outline from the topic and refining it with the simulated conversations and LLM knowledge.
4. **Refinin

In [13]:
original_message = "Explain how STORM works in the study to me like I'm a five years old."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
STORM is a system that automates the pre-writing stage by researching a topic, creating an outline, and simulating conversations between a writer and an expert to generate a full-length article. Here's how it works:

1. **Researching the topic**: STORM starts by researching the given topic via effective question asking. This involves generating a list of related topics and extracting tables of contents from corresponding Wikipedia articles.
2. **Discovering perspectives**: The system discovers different perspectives by surveying existing articles from similar topics. These perspectives are used to control the question asking process.
3. **Simulating conversations**: STORM simulates multi-turn conversations between a writer and an expert grounded on trusted online sources. This involves generating questions, answering them, and refining the outline based on the collected information.
4. **Creating the outline**: After researching the topic and si

In [26]:
original_message = "Explain how STORM works in the study to me in detail."
for m in model_list:
    answer = batch_conversation(original_message=original_message, model_name=m)
    print("model:", m)
    print(answer)
    print("="*20)

model: us.meta.llama3-2-3b-instruct-v1:0
Based on the provided prior knowledge, STORM is a system that automates the pre-writing stage by researching a topic, creating an outline, and simulating conversations between a writer and an expert to generate a full-length article. 

Here's a summary of how STORM works:

1. **Perspective-Guided Question Asking**: STORM discovers different perspectives by surveying existing articles from similar topics and uses these perspectives to control the question asking process. It prompts an LLM to generate a list of related topics and subsequently extracts the tables of contents from their corresponding Wikipedia articles. These tables of contents are concatenated to create a context to prompt the LLM to identify N perspectives that can collectively contribute to a comprehensive article on the given topic.

2. **Simulated Conversations**: STORM simulates multi-turn conversations between a Wikipedia writer and a topic expert grounded on trusted Internet