In [1]:
import logging
from typing import Any, Set

from automata.config.agent_config_builder import AutomataAgentConfigBuilder
from automata.config.config_types import AgentConfigName
from automata.core.agent.agent import AutomataAgent
from automata.core.agent.tools.tool_utils import (
    AgentToolFactory,
    DependencyFactory,
    build_llm_toolkits,
)
from automata.core.base.tool import ToolkitType


In [2]:
tool_list = ["context_oracle"]

dependencies: Set[Any] = set()
for tool in tool_list:
    for dependency_name, _ in AgentToolFactory.TOOLKIT_TYPE_TO_ARGS[ToolkitType(tool)]:
        dependencies.add(dependency_name)

kwargs = {}
print("  - Building dependencies...")
for dependency in dependencies:
    print(f"Building {dependency}...")
    kwargs[dependency] = DependencyFactory().get(dependency)


  - Building dependencies...
Building symbol_search...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 699/699 [00:00<00:00, 3174.43it/s]


Building symbol_doc_similarity...


In [3]:
llm_toolkits = build_llm_toolkits(tool_list, **kwargs)

In [4]:
from automata.core.base.tool import Tool, Toolkit, ToolkitType
llm_toolkits[ToolkitType.CONTEXT_ORACLE].tools[0]

Tool(name='context-oracle', description="\nThis tool combines SymbolSearch and SymbolSimilarity to create contexts. Given a query, it uses SymbolSimilarity calculate the similarity between each symbol's documentation and the query returns the most similar document. Then, it leverages SymbolSearch to combine Semantic Search with PageRank to find the most relevant symbols to the query. The overview documentation of these symbols is then concated to the result of the SymbolSimilarity query to create a context.\n\nFor instance, if a query reads 'Tell me about SymbolRank', it will find the most similar document to this query from the embeddings, which in this case would be the documentation for the SymbolRank class. Then, it will use SymbolSearch to fetch some of the most relevant symbols which would be 'Symbol', 'SymbolSearch', 'SymbolGraph', etc. This results in a comprehensive context for the query.\n", return_direct=True, verbose=False, func=<function ContextOracleTool.build.<locals>.<l

In [5]:
config_name = AgentConfigName("automata_retriever")

agent_config = (
    AutomataAgentConfigBuilder.from_name(config_name)
    .with_llm_toolkits(llm_toolkits)
    .with_model(kwargs.get("model", "gpt-4"))
    .build()
)
print(agent_config.system_instruction)

You are Automata Retriever, an autonomous software retrieval system built by OpenAI and designed to run within a local Python repository. You receive instructions from a user in simple English and carry out the instructions with the tools you are provided. You may use the following tools:

Tools:

context-oracle:

This tool combines SymbolSearch and SymbolSimilarity to create contexts. Given a query, it uses SymbolSimilarity calculate the similarity between each symbol's documentation and the query returns the most similar document. Then, it leverages SymbolSearch to combine Semantic Search with PageRank to find the most relevant symbols to the query. The overview documentation of these symbols is then concated to the result of the SymbolSimilarity query to create a context.

For instance, if a query reads 'Tell me about SymbolRank', it will find the most similar document to this query from the embeddings, which in this case would be the documentation for the SymbolRank class. Then, it

In [6]:
instructions = "Explain how embeddings are used by the codebase"
agent = AutomataAgent(instructions, config=agent_config)
agent.setup()
print(f"Result:\n{agent.run()}")


>>> automata_retriever Agent:
- thoughts
  - I will start by retrieving the relevant context from the oracle.
- actions
  - tool_query_1
    - tool_name
      - context-oracle
    - tool_args
      - Explain how embeddings are used by the codebase.


Failed to get embedding for symbol Symbol(scip-python python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0 `automata.cli.commands`/run_code_embedding()., scip-python, Package(python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0), (Descriptor(automata.cli.commands, 1), Descriptor(run_code_embedding, 4))) with error: 'Symbol Symbol(scip-python python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0 `automata.cli.commands`/run_code_embedding()., scip-python, Package(python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0), (Descriptor(automata.cli.commands, 1), Descriptor(run_code_embedding, 4))) not in database'
Failed to get embedding for symbol Symbol(scip-python python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0 `automata.tests.unit.test_symbol_embedding`/test_get_embedding()., scip-python, Package(python automata 9db05b7e7ebd49f93703df45accd7e5f9d5cedb0), (Descriptor(automata.tests.unit.test_symbol_embedding, 1), Descriptor(test_get_embedding, 4))) with error: 'Symbol


>>> automata_retriever Agent:
- actions
  - return_result_0
    - Embeddings are used in the codebase to represent Python symbols in a vector space. They are mainly used for tasks like semantic search, similarity calculation, and documentation generation. The codebase utilizes `SymbolCodeEmbedding` and `SymbolDocEmbedding` to represent code and documentation embeddings, respectively.

`SymbolCodeEmbeddingHandler` manages embeddings for source code symbols, while `SymbolDocEmbeddingHandler` manages the creation and retrieval of documentation embeddings for Python symbols using GPT-4. Embeddings are stored in a vector database, such as `JSONVectorDatabase`, which supports adding, updating, and retrieving symbol embeddings.

For example, to create a `SymbolCodeEmbedding` instance:
```python
import numpy as np
from automata.core.symbol.symbol_types import SymbolCodeEmbedding
from automata.core.symbol.parser import parse_symbol

symbol_str = "scip-python python automata 75482692a6fe30c72db