In [1]:
import sys
print(sys.version) # broai supports python3.11

3.11.12 (main, Apr  9 2025, 04:04:00) [Clang 20.1.0 ]


# Start

In [1]:
%load_ext autoreload
%autoreload 2

# Setup for the Test

In [2]:
from broai.prompt_management.core import Persona, Instructions, Example, Examples
from broai.llm_management.ollama import BedrockOllamaChat
from broai.prompt_management.core import PromptGenerator
from broai.agent_management.core import BroAgent
from pydantic import BaseModel, Field
from typing import List, Any

In [3]:
bedrock_model = BedrockOllamaChat()

In [4]:
class Joke(BaseModel):
    setup:str = Field(description="this is a setup for the joke")
    punchline:str = Field(description="this is a punchline of the joke")

class Jokes(BaseModel):
    jokes:List[Joke]

In [5]:
class InputFormat(BaseModel):
    message:str = Field(description="The user message")

# BroAgent with full Framework: Happy Path

In [7]:
%%time
pg = PromptGenerator(
    persona="You are the good bro Andy.",
    instructions=Instructions(
        instructions=[
            "tell some jokes based on message",
        ],
    ),
    structured_output=Jokes,
    examples=Examples(examples=[
        Example(
            setting="Funny Andy",
            input=InputFormat(message="Gimme three jokes"),
            output=Jokes(jokes=[
                Joke(setup="the setup of the joke to build curiosity", punchline="the punchline is to complete the joke")
            ]),
        )
    ]),
    fallback=Jokes(jokes=[Joke(setup="error", punchline="error")])
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request=InputFormat(message="Tell me ten jokes."))
print(response)

jokes=[Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything'), Joke(setup="Why don't eggs tell jokes?", punchline="They'd crack each other up"), Joke(setup='Why did the tomato turn red?', punchline='Because it saw the salad dressing'), Joke(setup='What do you call a fake noodle?', punchline='An impasta'), Joke(setup='Why did the scarecrow win an award?', punchline='Because he was outstanding in his field'), Joke(setup="Why don't lobsters share?", punchline="Because they're shellfish"), Joke(setup="What do you call a can opener that doesn't work?", punchline="A can't opener"), Joke(setup='I told my wife she was drawing her eyebrows too high.', punchline='She looked surprised'), Joke(setup="Why don't some couples go to the gym?", punchline="Because some relationships don't work out"), Joke(setup='Why did the bicycle fall over?', punchline='Because it was two-tired')]
CPU times: user 84.2 ms, sys: 15.8 ms, total: 100 ms
Wall time: 3.01 s


# BroAgent Full Framework: Pydantic Fallback

In [8]:
class Decoy(BaseModel):
    a:str
    b:str
    c:str
    e:int
    f:float

In [9]:
%%time
pg = PromptGenerator(
    persona="You are the good bro Andy.",
    instructions=Instructions(
        instructions=[
            "tell some jokes based on message",
        ],
    ),
    structured_output=Decoy,
    examples=Examples(examples=[
        Example(
            setting="Funny Andy",
            input=InputFormat(message="Gimme three jokes"),
            output=Jokes(jokes=[
                Joke(setup="the setup of the joke to build curiosity", punchline="the punchline is to complete the joke")
            ]),
        )
    ]),
    fallback=Jokes(jokes=[Joke(setup="error", punchline="error")])
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request="Tell me ten jokes.")
print(response)

jokes=[Joke(setup='error', punchline='error')]
CPU times: user 121 ms, sys: 0 ns, total: 121 ms
Wall time: 12.3 s


# BroAgent Full Framework: Default Fallback

In [10]:
%%time
pg = PromptGenerator(
    persona="You are the good bro Andy.",
    instructions=Instructions(
        instructions=[
            "tell some jokes based on message",
        ],
    ),
    structured_output=Decoy,
    examples=Examples(examples=[
        Example(
            setting="Funny Andy",
            input=InputFormat(message="Gimme three jokes"),
            output=Jokes(jokes=[
                Joke(setup="the setup of the joke to build curiosity", punchline="the punchline is to complete the joke")
            ]),
        )
    ]),
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request="Tell me ten jokes.")
print(response)

unknown error
CPU times: user 120 ms, sys: 588 μs, total: 121 ms
Wall time: 12.4 s


# BroAgent Full Framework: Custom Fallback in string

In [11]:
%%time
pg = PromptGenerator(
    persona="You are the good bro Andy.",
    instructions=Instructions(
        instructions=[
            "tell some jokes based on message",
        ],
    ),
    structured_output=Decoy,
    examples=Examples(examples=[
        Example(
            setting="Funny Andy",
            input=InputFormat(message="Gimme three jokes"),
            output=Jokes(jokes=[
                Joke(setup="the setup of the joke to build curiosity", punchline="the punchline is to complete the joke")
            ]),
        )
    ]),
    fallback="This is a custom fallback in string"
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request="Tell me ten jokes.")
print(response)

This is a custom fallback in string
CPU times: user 121 ms, sys: 0 ns, total: 121 ms
Wall time: 12.5 s


# BroAgent Full Framework: Custom Fallback not string

In [12]:
%%time
pg = PromptGenerator(
    persona="You are the good bro Andy.",
    instructions=Instructions(
        instructions=[
            "tell some jokes based on message",
        ],
    ),
    structured_output=Decoy,
    examples=Examples(examples=[
        Example(
            setting="Funny Andy",
            input=InputFormat(message="Gimme three jokes"),
            output=Jokes(jokes=[
                Joke(setup="the setup of the joke to build curiosity", punchline="the punchline is to complete the joke")
            ]),
        )
    ]),
    fallback={"error": "This is another custom fallback"}
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request="Tell me ten jokes.")
print(response)

{'error': 'This is another custom fallback'}
CPU times: user 120 ms, sys: 396 μs, total: 120 ms
Wall time: 12.4 s


# BroAgent with String: Happy Path

In [13]:
%%time
pg = PromptGenerator(
    persona="You are the good bro Andy.",
    instructions="tell some jokes based on message",
    structured_output="SETUP: \nthe setup of the joke to build curiosity\n\nPUNCHLINE: \nthe punchline is to complete the joke"
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request="Tell me ten jokes.")
print(response)

I gotcha! Here are ten jokes for ya:

1. Setup: I told my wife she was drawing her eyebrows too high.
Punchline: She looked surprised.

2. Setup: Why don't scientists trust atoms?
Punchline: Because they make up everything.

3. Setup: Why don't eggs tell jokes?
Punchline: They'd crack each other up.

4. Setup: What do you call a fake noodle?
Punchline: An impasta.

5. Setup: Why did the scarecrow win an award?
Punchline: Because he was outstanding in his field.

6. Setup: What do you call a can opener that doesn't work?
Punchline: A can't opener.

7. Setup: I'm reading a book about anti-gravity.
Punchline: It's impossible to put down.

8. Setup: Why did the bicycle fall over?
Punchline: Because it was two-tired.

9. Setup: What do you call a bear with no socks on?
Punchline: Barefoot.

10. Setup: Why did the banana go to the doctor?
Punchline: Because he wasn't peeling well.
CPU times: user 39.4 ms, sys: 164 μs, total: 39.6 ms
Wall time: 2.41 s


In [14]:
joke_in_string = response

# Bonus: Extract from normal string

In [33]:
%%time

class InputFormat(BaseModel):
    content:str

pg = PromptGenerator(
    persona="You are a content extractor.",
    instructions=Instructions(
        instructions=[
            "Extract the content into the sepcified JSON formant.",
        ],
    ),
    structured_output=Jokes,
    examples=Examples(examples=[
        Example(
            setting="Joke Extraction",
            input=InputFormat(content=joke_in_string),
            output=Jokes(jokes=[
                Joke(setup="the setup of the joke to build curiosity", punchline="the punchline is to complete the joke")
            ]),
        )
    ]),
    fallback=Jokes(jokes=[Joke(setup="error", punchline="error")])
)

bro = BroAgent(
    prompt_generator=pg,
    model=bedrock_model
)

response = bro.run(request=InputFormat(content=joke_in_string))
print(response)

jokes=[Joke(setup='I told my wife she was drawing her eyebrows too high.', punchline='She looked surprised.'), Joke(setup="Why don't scientists trust atoms?", punchline='Because they make up everything.'), Joke(setup="Why don't eggs tell jokes?", punchline="They'd crack each other up."), Joke(setup='What do you call a fake noodle?', punchline='An impasta.'), Joke(setup='Why did the scarecrow win an award?', punchline='Because he was outstanding in his field.'), Joke(setup="What do you call a can opener that doesn't work?", punchline="A can't opener."), Joke(setup="I'm reading a book about anti-gravity.", punchline="It's impossible to put down."), Joke(setup='Why did the bicycle fall over?', punchline='Because it was two-tired.'), Joke(setup='What do you call a bear with no socks on?', punchline='Barefoot.'), Joke(setup='Why did the banana go to the doctor?', punchline="Because he wasn't peeling well.")]
CPU times: user 40.8 ms, sys: 2.83 ms, total: 43.7 ms
Wall time: 3.02 s


# DuckStore

In [7]:
from broai.duckdb_management.utils import get_create_table_query, get_insert_query, get_batch_update_query
from broai.duckdb_management.interface import DuckStoreInterface

In [8]:
schemas = {
    "doc_id": "VARCHAR",
    "content": "VARCHAR",
    "data": "JSON"
}

sm = DuckStoreInterface(db="./duckmemory.db", table="sessionmemory", schemas=schemas)

In [9]:
sm.create_table()

In [10]:
sm.sql_df(query="SELECT * FROM sessionmemory;")

Unnamed: 0,doc_id,content,data


In [11]:
sm.show_schemas()

{'doc_id': 'VARCHAR', 'content': 'VARCHAR', 'data': 'JSON'}

In [12]:
_data = [
    ["0", "a"],
    ["1", "b"]
]
data = ", ".join([f"('{d[0]}', '{d[1]}')" for d in _data])
sm.add(fields=["doc_id", "content"], data=data)
sm.read(fields=["*"])

Unnamed: 0,doc_id,content,data
0,0,a,
1,1,b,


In [13]:
_data = [
    ["0", "aa"],
    ["1", "bb"]
]
data = ", ".join([f"('{d[0]}', '{d[1]}')" for d in _data])
sm.update(schemas={"doc_id": "VARCHAR", "content": "VARCHAR"}, data=data, ref_keys=["doc_id"])
sm.read()

Unnamed: 0,doc_id,content,data
0,0,aa,
1,1,bb,


In [14]:
sm.delete(where_condition="WHERE doc_id IN ('1', '2')")
sm.read()

Unnamed: 0,doc_id,content,data
0,0,aa,


In [15]:
sm.delete_table()

In [16]:
sm.drop_table()

In [17]:
sm.remove_database(confirm="remove database")

# Utility

In [18]:
from broai.interface import Context, TaskStatus

In [19]:
Context(context="Test")

Context(id='472d878e-2b0b-4f43-9876-3045455b0855', context='Test', metadata=None, type='document', created_at='2025-04-20 16:26:08.193622')

In [20]:
TaskStatus.NOT_STARTED

<TaskStatus.NOT_STARTED: 'not_started'>

In [21]:
TaskStatus.PENDING

<TaskStatus.PENDING: 'pending'>

In [22]:
TaskStatus.DONE

<TaskStatus.DONE: 'done'>

# Experiment

## pdf_to_markdown

In [7]:
from broai.experiments.pdf_to_markdown import pdf_to_markdown

markdown_text, images = pdf_to_markdown("./docs/test1/storm.pdf")

  markdown_text, images = pdf_to_markdown("./docs/test1/storm.pdf")


Loaded layout model s3://layout/2025_02_18 on device cuda with dtype torch.float16
Loaded texify model s3://texify/2025_02_18 on device cuda with dtype torch.float16
Loaded recognition model s3://text_recognition/2025_02_18 on device cuda with dtype torch.float16
Loaded table recognition model s3://table_recognition/2025_02_18 on device cuda with dtype torch.float16
Loaded detection model s3://text_detection/2025_02_28 on device cuda with dtype torch.float16
Loaded detection model s3://inline_math_detection/2025_02_24 on device cuda with dtype torch.float16


Recognizing layout: 100%|██████████| 5/5 [00:03<00:00,  1.33it/s]
Running OCR Error Detection: 100%|██████████| 7/7 [00:00<00:00, 65.98it/s]
Detecting bboxes: 0it [00:00, ?it/s]
Texify inference: 100%|██████████| 1/1 [00:01<00:00,  1.44s/it]
Detecting bboxes: 0it [00:00, ?it/s]
Recognizing tables: 100%|██████████| 2/2 [00:01<00:00,  1.44it/s]


In [8]:
with open("./docs/test1/storm.md", "w") as f:
    f.write(markdown_text)

## Enmedding: BAAI/bge-m3

In [7]:
from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3',  
                       use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

sentences_1 = ["What is BGE M3?", "Defination of BM25"]
sentences_2 = ["BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.", 
               "BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document"]

embeddings_1 = model.encode(sentences_1, 
                            batch_size=12, 
                            max_length=8192, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
                            )['dense_vecs']
embeddings_2 = model.encode(sentences_2)['dense_vecs']
similarity = embeddings_1 @ embeddings_2.T
print(similarity)
# [[0.6265, 0.3477], [0.3499, 0.678 ]]


Fetching 30 files: 100%|██████████| 30/30 [00:11<00:00,  2.51it/s]
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


[[0.626  0.3477]
 [0.3499 0.678 ]]


In [8]:
%%time
embeddings_1 = model.encode(sentences_1, 
                            batch_size=12, 
                            max_length=8192, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
                            )['dense_vecs']

CPU times: user 39.2 ms, sys: 0 ns, total: 39.2 ms
Wall time: 38.8 ms


In [9]:
%%time
embeddings_2 = model.encode(sentences_2)['dense_vecs']

CPU times: user 38.3 ms, sys: 334 μs, total: 38.6 ms
Wall time: 38.3 ms


In [10]:
%%time
embeddings_3 = model.encode([
    sentences_1,
    sentences_2,
    sentences_1,
    sentences_2,
    sentences_1,
    sentences_2,
    sentences_1,
    sentences_2,
])['dense_vecs']

CPU times: user 39.5 ms, sys: 0 ns, total: 39.5 ms
Wall time: 39.1 ms


In [11]:
embeddings_3

array([[-0.02412 , -0.05185 , -0.01099 , ...,  0.0371  ,  0.02528 ,
        -0.01452 ],
       [ 0.01265 , -0.0691  ,  0.002552, ...,  0.02354 ,  0.001125,
         0.00848 ],
       [-0.02412 , -0.05185 , -0.01099 , ...,  0.0371  ,  0.02528 ,
        -0.01452 ],
       ...,
       [ 0.01265 , -0.0691  ,  0.002552, ...,  0.02354 ,  0.001125,
         0.00848 ],
       [-0.02412 , -0.05185 , -0.01099 , ...,  0.0371  ,  0.02528 ,
        -0.01452 ],
       [ 0.01265 , -0.0691  ,  0.002552, ...,  0.02354 ,  0.001125,
         0.00848 ]], shape=(8, 1024), dtype=float16)

## CrossEncoder: cross-encoder/ms-marco-MiniLM-L6-v2

In [1]:
%%time
from sentence_transformers.cross_encoder import CrossEncoder

model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


CPU times: user 5.75 s, sys: 692 ms, total: 6.45 s
Wall time: 5.96 s


In [2]:
query = ["pandas is goose", "pandas is good", "pandas is great", "pandas is goat", "pandas is gang"]
scores = model.predict([["pandas is good", q] for q in query])

## rerank_contexts

In [8]:
from broai.experiments.rerank import rerank_contexts
from broai.interface import Context

In [5]:
reranked_contexts = rerank_contexts(query, scores, top_n=3)
reranked_contexts

  reranked_contexts = rerank_contexts(query, scores, top_n=3)


(['pandas is good', 'pandas is great', 'pandas is goat'],
 [8.630863189697266, 7.362998962402344, 0.6360796689987183])

## chunk

In [1]:
with open("./docs/test1/storm.md", "r") as f:
    markdown_text = f.read()

In [3]:
from broai.experiments.chunk import split_markdown, consolidate_markdown, get_markdown_sections, split_overlap, chunk_chunks
from broai.interface import Context

In [4]:
chunks = split_markdown(markdown_text)

Markdown headings: max(4)


  chunks = split_markdown(markdown_text)


In [5]:
len(chunks)

54

In [6]:
consolidated_chunks = consolidate_markdown(chunks)
len(consolidated_chunks)

  consolidated_chunks = consolidate_markdown(chunks)


50

In [7]:
sections = get_markdown_sections(consolidated_chunks)
len(sections)

  sections = get_markdown_sections(consolidated_chunks)


50

In [8]:
contexts = []
source = ".docs/test1/storm.md"
for section, chunk in zip(sections, consolidated_chunks):
    contexts.append(Context(context=chunk, metadata={"section": section, "source": source, "type": "document"}))
len(contexts)

50

In [10]:
new_contexts = split_overlap(contexts)
len(new_contexts)

  new_contexts = split_overlap(contexts)


85

In [11]:
chunk_chunks([c.context for c in new_contexts])

[0] | tokens: 35 | chars: 309
[1] | tokens: 189 | chars: 1349
[2] | tokens: 500 | chars: 4170
[3] | tokens: 462 | chars: 3550
[4] | tokens: 500 | chars: 2030
[5] | tokens: 211 | chars: 1257
[6] | tokens: 162 | chars: 1096
[7] | tokens: 238 | chars: 1830
[8] | tokens: 196 | chars: 2327
[9] | tokens: 256 | chars: 1831
[10] | tokens: 226 | chars: 1627
[11] | tokens: 114 | chars: 759
[12] | tokens: 150 | chars: 1002
[13] | tokens: 57 | chars: 422
[14] | tokens: 166 | chars: 1305
[15] | tokens: 105 | chars: 681
[16] | tokens: 500 | chars: 1605
[17] | tokens: 500 | chars: 1131
[18] | tokens: 500 | chars: 868
[19] | tokens: 480 | chars: 1431
[20] | tokens: 500 | chars: 3018
[21] | tokens: 320 | chars: 924
[22] | tokens: 196 | chars: 1381
[23] | tokens: 500 | chars: 2242
[24] | tokens: 500 | chars: 3699
[25] | tokens: 260 | chars: 1890
[26] | tokens: 441 | chars: 3970
[27] | tokens: 88 | chars: 637
[28] | tokens: 159 | chars: 1194
[29] | tokens: 62 | chars: 413
[30] | tokens: 230 | chars: 1487