In [1]:
import logging

from automata.cli.commands import reconfigure_logging
from automata.config.base import AgentConfigName
from automata.config.openai_agent import OpenAIAutomataAgentConfigBuilder
from automata.core.agent.providers import OpenAIAutomataAgent
from automata.core.singletons.dependency_factory import dependency_factory
from automata.core.singletons.py_module_loader import py_module_loader
from automata.core.tools.factory import AgentToolFactory

logger = logging.getLogger(__name__)
reconfigure_logging("DEBUG")

py_module_loader.initialize()

In [2]:
# Construct the set of all dependencies that will be used to build the tools
toolkit_list = ["context-oracle"]
tool_dependencies = dependency_factory.build_dependencies_for_tools(toolkit_list)

[32mINFO:automata.core.tools.factory:Building dependencies for toolkit_list ['context-oracle']...[0m
[32mINFO:automata.core.tools.factory:Building embedding_similarity_calculator...[0m
[32mINFO:automata.core.tools.factory:Creating dependency embedding_similarity_calculator[0m
[32mINFO:automata.core.tools.factory:Building symbol_doc_embedding_handler...[0m
[32mINFO:automata.core.tools.factory:Creating dependency symbol_doc_embedding_handler[0m
[32mINFO:automata.core.tools.factory:Creating dependency symbol_search[0m
[32mINFO:automata.core.tools.factory:Creating dependency symbol_graph[0m
[32mINFO:automata.core.tools.factory:Creating dependency symbol_code_embedding_handler[0m
[32mINFO:automata.core.tools.factory:Creating dependency py_context_retriever[0m
[32mINFO:automata.core.tools.factory:Building symbol_code_embedding_handler...[0m


In [3]:
# Build the tools
tools = AgentToolFactory.build_tools(toolkit_list, **tool_dependencies)

In [4]:
# Build the agent config
config_name = AgentConfigName("automata-main")

agent_config = (
    OpenAIAutomataAgentConfigBuilder.from_name(config_name)
    .with_tools(tools)
    .with_model("gpt-4")
    .with_max_iterations(5)
    .build()
)

[32mINFO:automata.core.tools.factory:Creating dependency symbol_rank[0m
[32mINFO:automata.core.tools.factory:Creating dependency subgraph[0m
[32mINFO:automata.core.symbol.graph:Pre-computing bounding boxes for all rankable symbols[0m
[32mINFO:automata.core.symbol.graph:Finished pre-computing bounding boxes for all rankable symbols in 9.046106100082397 seconds[0m
[32mINFO:automata.core.symbol.graph:Building the rankable symbol subgraph...[0m
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 814/814 [00:05<00:00, 148.33it/s]
[32mINFO:automata.core.symbol.graph:Built the rankable symbol subgraph[0m


In [5]:
# Initialize the agent
instructions = "Explain how embeddings are used by the codebase"
agent = OpenAIAutomataAgent(instructions, config=agent_config)

[36mDEBUG:automata.core.agent.providers:Setting up agent with tools = [OpenAITool(function=<bound method ContextOracleToolkitBuilder._get_context of <automata.core.tools.builders.context_oracle.ContextOracleOpenAIToolkitBuilder object at 0x15276a070>>, name='context-oracle', description="This tool utilizes the EmbeddingSimilarityCalculator and SymbolSearch to provide context for a given query by computing semantic similarity between the query and all available symbols' documentation and code. The symbol with the highest combined similarity score is identified, with its source code and documentation summary forming the primary context. Additionally, if enabled, the documentation summaries of related symbols (those next most similar to the query) are included.", coroutine=None, properties={'query': {'type': 'string', 'description': 'The query string to search for.'}, 'max_additional_related_symbols': {'type': 'integer', 'description': 'The maximum number of additional related symbols to

[36mDEBUG:automata.core.agent.providers:
------------------------------------------------------------
Session ID: dada11df-473e-42ed-a5e8-476de78ed2db
------------------------------------------------------------

[0m


In [6]:
# Run the agent
result = agent.run()

[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------
Latest Assistant Message -- 
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 2525 tokens were consumed prior to completion generation.[0m


Function Call:
context-oracle

Arguments:
{
  "query": "embedding"
}

 

[36mDEBUG:automata.core.llm.providers.openai:Approximately 2559 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[32mINFO:numexpr.utils:NumExpr defaulting to 8 threads.[0m
[36mDEBUG:automata.core.agent.providers:Latest User Message -- 
user:
content=Execution Result:

@property
    def symbol(self) -> Symbol:
        return self.key

    Documentation Summary:

Symbol: automata.core.symbol_embedding.base.SymbolEmbedding.symbol
Fetching related context now for 1 symbols...

automata.tests.unit.test_symbol_embedding.test_get_embedding

Symbol: automata.tests.unit.test_symbol_embedding.test_get_embedding






NOTE - you are at iteration 1 out of a maximum of 5. Please return a result with call_termination when ready.
function_call=None
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 2680 tokens were after adding the latest mess

Sure. Based on the code snippets and context, embeddings in the codebase are mainly handled by the `SymbolEmbedding` class located in `automata.core.symbol_embedding.base`. This class seems to be associated with a specific `Symbol`.

A `SymbolEmbedding` seems to be tied to a particular `Symbol`, as seen in the `symbol` property of the `SymbolEmbedding` class. This suggests that each `SymbolEmbedding` probably represents the embedded (compressed into vectors) form of code or documentation information for a specific `Symbol`.

To better explain how embeddings work, let's understand their role generally. In machine learning, embeddings are a way of representing complex, high-dimensional data in a much lower-dimensional space, typically a vector space. This significant reduction in dimensionality often helps machine learning models to better process and understand this data. For example, text data, which can have thousands of possible tokens (words, characters), can be represented in a muc

[36mDEBUG:automata.core.llm.providers.openai:Approximately 3041 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[36mDEBUG:automata.core.agent.providers:Latest User Message -- 
user:
content=Continue...NOTE - you are at iteration 2 out of a maximum of 5. Please return a result with call_termination when ready.
function_call=None
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 3081 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------
Latest Assistant Message -- 
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 3081 tokens were consumed prior 

Function Call:
context-oracle

Arguments:
{"query": "SymbolEmbedding"}

 

[36mDEBUG:automata.core.llm.providers.openai:Approximately 3117 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[36mDEBUG:automata.core.agent.providers:Latest User Message -- 
user:
content=Execution Result:

@property
    def symbol(self) -> Symbol:
        return self.key

    Documentation Summary:

Symbol: automata.core.symbol_embedding.base.SymbolEmbedding.symbol
Fetching related context now for 1 symbols...

automata.core.memory_store.symbol_doc_embedding.SymbolDocEmbeddingHandler.get_embedding

Symbol: automata.core.memory_store.symbol_doc_embedding.SymbolDocEmbeddingHandler.get_embedding






NOTE - you are at iteration 3 out of a maximum of 5. Please return a result with call_termination when ready.
function_call=None
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 3248 tokens were after adding the latest message.[0

Great, we have a bit more information now. 

The `SymbolEmbedding` class provides embedded information for symbols in vector form. It has a property called `symbol` that returns the symbol corresponding to the embedding. 

However, the creation and usage of the `SymbolEmbedding` instances appears to happen in classes like `SymbolDocEmbeddingHandler`. There's a specific method in this handler class, `get_embedding`, which likely retrieves the corresponding embedding for a requested symbol.

This suggests that while `SymbolEmbedding` provides the data structure for the actual embedding, classes like `SymbolDocEmbeddingHandler` manage the retrieval and usage of these embeddings within the codebase. It's worth noting that the specific mechanisms, abstraction levels, and usage scenarios could vary across different parts of the codebase.

In generic terms, manipulating embeddings includes operations such as: 

- Creating embeddings: this likely involves machine learning models such as word2v

[36mDEBUG:automata.core.llm.providers.openai:Approximately 3674 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[36mDEBUG:automata.core.agent.providers:Latest User Message -- 
user:
content=Continue...NOTE - YOU HAVE EXCEEDED YOUR MAXIMUM ALLOWABLE ITERATIONS, RETURN A RESULT NOW WITH call_termination.
function_call=None
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 3710 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------
Latest Assistant Message -- 
[0m
[36mDEBUG:automata.core.llm.providers.openai:Approximately 3710 tokens were consumed prior to comp

Function Call:
call_termination

Arguments:
{"result": "The `SymbolEmbedding` class in the codebase is a representation of symbols in the form of vectors, also known as embeddings. These embeddings are a direct result of using machine learning models to transform complex high-dimensional data into a lower-dimensional space. The transformation helps machines better understand the data. In the context of this codebase, embeddings could be used to represent complex code logic, functions, classes into simpler and meaningful vector representations. The class `SymbolDocEmbeddingHandler` manages the retrieval and usage of these embeddings within the codebase. Operations on embeddings could include creating, updating, and retrieving for various use cases like code analysis, similarity comparisons, etc."}

 

[36mDEBUG:automata.core.llm.providers.openai:Approximately 3875 tokens were after adding the latest message.[0m
[36mDEBUG:root:
------------------------------------------------------------------------------------------------------------------------[0m
[36mDEBUG:automata.core.agent.providers:Latest User Message -- 
user:
content=Execution Result:

The `SymbolEmbedding` class in the codebase is a representation of symbols in the form of vectors, also known as embeddings. These embeddings are a direct result of using machine learning models to transform complex high-dimensional data into a lower-dimensional space. The transformation helps machines better understand the data. In the context of this codebase, embeddings could be used to represent complex code logic, functions, classes into simpler and meaningful vector representations. The class `SymbolDocEmbeddingHandler` manages the retrieval and usage of these embeddings within the codebase. Operations on embeddings could include cr

In [7]:
# Print the result
print(f"Result:\n{result}")

Result:
Execution Result:

The `SymbolEmbedding` class in the codebase is a representation of symbols in the form of vectors, also known as embeddings. These embeddings are a direct result of using machine learning models to transform complex high-dimensional data into a lower-dimensional space. The transformation helps machines better understand the data. In the context of this codebase, embeddings could be used to represent complex code logic, functions, classes into simpler and meaningful vector representations. The class `SymbolDocEmbeddingHandler` manages the retrieval and usage of these embeddings within the codebase. Operations on embeddings could include creating, updating, and retrieving for various use cases like code analysis, similarity comparisons, etc.


