### Init and imports

In [None]:
import duckdb
from pydantic import BaseModel

import autogen
from autogen.agentchat.contrib.learning.knowledge_code_gen_swarm_agent import KnowledgeCodeGenSwarmAgent
from autogen.agentchat.contrib.learning.knowledge_function_swarm_agent import KnowledgeFunctionSwarmAgent
from autogen.coding.jupyter.local_jupyter_server import LocalJupyterServer

key_map = {"gpt-4o-mini": "OPENAI_API_KEY"}

config_list = autogen.config_list_from_json(env_or_file="/workspaces/ag2/OAI_CONFIG_LIST")

llm_config = {
    "cache_seed": 42,  # Change the cache_seed for different trials
    "temperature": 1,
    "config_list": config_list,
    "timeout": 120,
}

### Call a function for me

The first feature to show is creating an in code agent which calls a function for you.

`run_query` This function is very simple. One interesting thing (that needs to be documented) is that
the function you provide can return a Tuple[Any, str]. Where the Any is passed through as the final result
but the string is what is used and interpreted by the LLM. This is so we can truncate the data for the LLM to see
but we can return the whole thing to the user.

`queryer` This sets up an agent which is responsible for using the function.

That's all the setup! Then it's ready to go with some simple queries. It doesn't really know anything yet.


In [None]:
def run_query(context_variables: dict, query: str):
    """
    Executes a DuckDB query and returns the results as a markdown table.

    Args:
        context_variables (dict): Contextual variables (unused in this function).
        query (str): The DuckDB-compatible SQL query to execute.

    Returns:
        str: Query results
    """
    with duckdb.connect("/workspaces/ag2/autogen/agentchat/contrib/learning/animal_crossing.duckdb") as conn:
        df = conn.execute(query).fetchdf()
        response_lines = []
        if len(df) > 50:
            response_lines.append(
                f"Truncated Results: Query returned {len(df)} rows. Only 50 rows will be visible. This is not an error"
            )
        response_lines.append(df.head(50).to_markdown())
        return (df, "\n".join(response_lines))


queryer = KnowledgeFunctionSwarmAgent(
    name="AnimalCrossingQueryer",
    researcher_llm_config=llm_config,
    result_validator_llm_config=llm_config,
    agent_system_message="You are responsible for querying a database containing information about the game animal crossing. It's fine if the result you see is truncated.",
    max_rounds=50,
    functions=[run_query],
)

list_tables_result = queryer.auto_gen_func("List the available tables. Only use valid DuckDB syntax.")

print(list_tables_result.result)

# Explicitly tell the agent to remember this response
queryer.remember(list_tables_result)

### It needs a friend!
To show off the agents sharing memories with one another, here is another agent table_summarizer.
This function defined here should be able to be replaced with a way to configure structured output, but that's not super relevant.

This agent is responsible for summarizing tables. Since it's implemented as a function, these can be saved to memories!

This helps make the super rough example possible:
- The results of one agent aren't great to be stored in memories. They're either long or ugly
- So you instead pass the results to another agent to summarize, then you use the memories of that agent instead!

In [None]:
class ColumnSummary(BaseModel):
    column_name: str
    column_type: str
    values_summary: str


class TableSummaryModel(BaseModel):
    table_name: str
    table_summary: str
    columns: list[ColumnSummary]

    def __str__(self):
        return f"""table_name: {self.table_name}
table_summary:
{self.table_summary}"""


def summarize_table(context_variables: dict, table_summary: TableSummaryModel):
    """
    Allows you to specify a summary of the provided text.

    Args:
        table_summary (TableSummaryModel): The informational summary of the table.
    Returns:
        str: the summary
    """
    return table_summary, str(table_summary)


table_summarizer = KnowledgeFunctionSwarmAgent(
    name="TableSummarizer",
    researcher_llm_config=llm_config,
    result_validator_llm_config=llm_config,
    agent_system_message="""Provide a summary of the provided table using the summarize function.
Ensure you fill out the entire TableSummaryModel schema:
table_name:str
table_summary: str The summary of the purpose and content of the table. 3 sentences max.
columns:list[str]
column_name: str
column_type: str
values_summary: str""",
    max_rounds=5,
    functions=[summarize_table],
)

ten_fish_res = queryer.auto_gen_func("Show me 10 fish")
# Note, not saving the memory because this is just for an example
summary_res = table_summarizer.auto_gen_func(f"""Summarize the following table: fish
{ten_fish_res.result.to_markdown()}""")
# TODO there's some bug here where the function is being passed a dict and not the data type
print("\n\n")
print(str(summary_res.result["table_summary"]))

### Putting the two of them together
Putting them together, this code will query 10 example rows, then pass in those example rows to be used to summarize the table.

Information about the previously queried or summarized tables are not super useful here, so we `remember` them after
to save on context to make this fast.

In [None]:
res_funcs = []
tables_df = list_tables_result.result
for row in tables_df.itertuples(index=True):
    # TODO remove, just a hack to make it go faster for now.
    if len(res_funcs) > 10:
        break
    table_name = row.name

    # TODO some way to cache the function here so it doesn't have to figure it out every time with LLM calls.
    example_row_res = queryer.auto_gen_func(f"List 10 example rows in the table {table_name}")

    summary_res = table_summarizer.auto_gen_func(f"""Summarize the following table: {table_name}
{example_row_res.result.to_markdown()}""")

    # Save the memories so they can be remembered after the loop
    res_funcs.append(summary_res)

# Go through and remember all the summaries
for res_func in res_funcs:
    table_summarizer.remember(res_func)

# Now that the table_summarizer is populated with summaries of all the tables, the queryer can use it!
queryer.add_knowledge_source(table_summarizer)

print(f"Queryer has {len(queryer.get_function_memories_as_messages())} memories")

### So now lets show off the impact of memory

First, we just try to ask a question that is probably too hard for a model like 4o-mini to one shot with its current knowledge
It struggles a lot more figuring out what big means

In [None]:
big_fish_res = queryer.auto_gen_func("How many big fish are there?")

print(big_fish_res.result)

#### Lets have the agent examine the sizes 

In [None]:
fish_sizes_res = queryer.auto_gen_func("What are the different possible sizes of fish?")
print(fish_sizes_res.result)

queryer.remember(fish_sizes_res)

#### And now it should be able to do a better job. It got it in one go!


In [None]:
big_fish_res = queryer.auto_gen_func("How many big fish are there?")

print(big_fish_res.result)

### Code Generation

Now onto code generation. The first example here is showing code generation without 
leveraging the memory of previous agents. The answer isn't quite right. It just tries
to 


In [None]:
from typing import Tuple

import pandas

all_fish_res = queryer.auto_gen_func("Get me all fish")
all_fish_df = all_fish_res.result

with LocalJupyterServer() as server:
    func_generator = KnowledgeCodeGenSwarmAgent(
        name="FunctionGenerator",
        docker_server=server,
        researcher_llm_config=llm_config,
        result_validator_llm_config=llm_config,
        max_rounds=50,
    )
    rand_fish_func_res = func_generator.generate_function(
        """Generate me a function which picks a random fish from the input fish dataframe input_df, but the choice is decided based on their size.
The bigger the fish the more common it is based on the multiplier parameter. Return (Name, Size). The result is random, its fine if it returns something small. """,
        function_name="column_analysis_func",
        params={"input_df": (all_fish_df, pandas.DataFrame), "multiplier": (5, int)},
        return_type=Tuple[str, str],
    )
    rand_fish_func = rand_fish_func_res.result

    print(rand_fish_func(all_fish_df, 10))

#### Now here we add `func_generator.add_knowledge_source(queryer)` to give the memories of the queryer to the function generator. It should help it know more about what the columns are.

In [None]:
from typing import Tuple

import pandas

all_fish_res = queryer.auto_gen_func("Get me all fish")
all_fish_df = all_fish_res.result
with LocalJupyterServer() as server:
    func_generator = KnowledgeCodeGenSwarmAgent(
        name="FunctionGenerator",
        docker_server=server,
        researcher_llm_config=llm_config,
        result_validator_llm_config=llm_config,
        max_rounds=50,
    )
    func_generator.add_knowledge_source(queryer)
    rand_fish_func_res = func_generator.generate_function(
        """Generate me a function which picks a random fish from the input fish dataframe input_df, but the choice is decided based on their size.
The bigger the fish the more common it is based on the multiplier parameter. Return (Name, Size). The result is random, its fine if it returns something small. """,
        function_name="column_analysis_func",
        params={"input_df": (all_fish_df, pandas.DataFrame), "multiplier": (5, int)},
        return_type=Tuple[str, str],
    )
    rand_fish_func = rand_fish_func_res.result

for i in range(10):
    print(rand_fish_func(all_fish_df, 10))

for i in range(10):
    print(rand_fish_func(all_fish_df, 1000))