emrgnt-cmplxty · emrgnt-cmplxty · Jul 5, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/.setup.sh.example b/.setup.sh.example
@@ -1,9 +1,3 @@
-## NOTE - the code below is contained in setup.sh.example
-
-# Clone the repository
-git clone git@github.com:emrgnt-cmplxty/Automata.git
-cd Automata
-
 # Create the local environment
 python3 -m venv local_env
 source local_env/bin/activate
@@ -22,16 +16,27 @@ CONVERSATION_DB_PATH="$PWD/conversation_db.sqlite3"
 TASK_DB_PATH="$PWD/task_db.sqlite3"
 TASKS_OUTPUT_PATH="$PWD/tasks"
 REPOSITORY_NAME="emrgnt-cmplxty/Automata"
-sed -i "s|your_openai_api_key|$OPEN_API_KEY|" .env
-sed -i "s|your_github_api_key|$GITHUB_API_KEY|" .env
-sed -i "s|your_conversation_db_path|$CONVERSATION_DB_PATH|" .env
-sed -i "s|your_task_db_path|$TASK_DB_PATH|" .env
-sed -i "s|your_tasks_output_path|$TASKS_OUTPUT_PATH|" .env
-sed -i "s|your_repository_name|$REPOSITORY_NAME|" .env
+
+# Detect the operating system
+if [[ "$OSTYPE" == "darwin"* ]]; then
+    # Mac OSX
+    sed -i '' "s|your_openai_api_key|$OPEN_API_KEY|" .env
+    sed -i '' "s|your_github_api_key|$GITHUB_API_KEY|" .env
+    sed -i '' "s|your_conversation_db_path|$CONVERSATION_DB_PATH|" .env
+    sed -i '' "s|your_task_db_path|$TASK_DB_PATH|" .env
+    sed -i '' "s|your_tasks_output_path|$TASKS_OUTPUT_PATH|" .env
+    sed -i '' "s|your_repository_name|$REPOSITORY_NAME|" .env
+else
+    # Linux and others
+    sed -i "s|your_openai_api_key|$OPEN_API_KEY|" .env
+    sed -i "s|your_github_api_key|$GITHUB_API_KEY|" .env
+    sed -i "s|your_conversation_db_path|$CONVERSATION_DB_PATH|" .env
+    sed -i "s|your_task_db_path|$TASK_DB_PATH|" .env
+    sed -i "s|your_tasks_output_path|$TASKS_OUTPUT_PATH|" .env
+    sed -i "s|your_repository_name|$REPOSITORY_NAME|" .env
+fi
 # Additional Notes -
 # Default Max Workers is 8, manually change the .env to update this quantity.
-# For MAC users, the example should read as follows -
-## sed -i '' "s|your_openai_api_key|$OPEN_API_KEY|" .env
 
 # Fetch the submodules
 git submodule update --init --recursive
@@ -45,4 +50,4 @@ git submodule update --init --recursive
 ###
 ### Then, initialize by running the following:
 ##  git lfs install
-##  git lfs pull
+##  git lfs pull
diff --git a/README.md b/README.md
@@ -35,6 +35,9 @@ https://github.com/emrgnt-cmplxty/Automata/assets/68796651/2e1ceb8c-ac93-432b-af
 Follow these steps to setup the Automata environment
 
 ```bash
+# Clone the repository
+git clone git@github.com:emrgnt-cmplxty/Automata.git && cd Automata
+
 # Copy the env and setup files
 cp .setup.sh.example setup.sh && cp .env.example .env
 
@@ -43,7 +46,6 @@ chmod 755 setup.sh
 
 # Update the setup and env files with your local paths
 vim setup.sh
-vim .env
 
 # Run the setup script
 ./setup.sh
@@ -68,10 +70,10 @@ automata run-code-embedding
 
 # "L1" docs are the docstrings written into the code
 # "L2" docs are generated from the L1 docs + symbol context
-automata run-doc-embedding-l2
+automata run-doc-embedding --embedding-level 2
 
 # "L3" docs are generated from the L2 docs + symbol context
-automata run-doc-embedding-l3
+automata run-doc-embedding --embedding-level 3
 ```
 
 ### Run the system
@@ -83,7 +85,7 @@ The following commands illustrate how to run the system with a trivial instructi
 automata run-agent --instructions="Return true" --model=gpt-3.5-turbo-0613
 
 # Run a single agent w/ a non-trivial instruction
-automata run-agent --instructions="Explain what AutomataAgent is and how it works, include an example to initialize an instance of AutomataAgent." --model=gpt-3.5-turbo-16k
+automata run-agent --instructions="Explain what AutomataAgent is and how it works, include an example to initialize an instance of AutomataAgent."
 ```
 
 ---
@@ -100,14 +102,12 @@ Sometimes the best way to understand a complicated system is to start by underst
 
 ```python
 
-import logging
-from automata.config.openai_agent import AutomataOpenAIAgentConfigBuilder
+from automata.config.base import AgentConfigName
+from automata.config.openai_agent import OpenAIAutomataAgentConfigBuilder
 from automata.core.agent.providers import OpenAIAutomataAgent
-from automata.core.tools.tool_utils import AgentToolFactory
 from automata.core.singletons.dependency_factory import dependency_factory
-from automata.core.singletons.module_loader import py_module_loader
-
-logger = logging.getLogger(__name__)
+from automata.core.singletons.py_module_loader import py_module_loader
+from automata.core.tools.factory import AgentToolFactory
 
 # Initialize the module loader to the local directory
 py_module_loader.initialize()
@@ -121,7 +121,7 @@ tools = AgentToolFactory.build_tools(toolkit_list, **tool_dependencies)
 
 # Build the agent config
 agent_config = (
-    AutomataOpenAIAgentConfigBuilder.from_name("automata-main")
+    OpenAIAutomataAgentConfigBuilder.from_name("automata-main")
     .with_tools(tools)
     .with_model("gpt-4")
     .build()

diff --git a/automata/cli/commands.py b/automata/cli/commands.py
@@ -47,25 +47,15 @@ def run_code_embedding(ctx, *args, **kwargs) -> None:
 @common_options
 @cli.command()
 @click.pass_context
-def run_doc_embedding_l2(ctx, *args, **kwargs) -> None:
-    """Run the document embedding Level-2 pipeline."""
-    from automata.cli.scripts.run_doc_embedding_l2 import main
+@click.option("--embedding-level", type=int, default=2, help="Level of the embedding.")
+def run_doc_embedding(ctx, *args, **kwargs) -> None:
+    from automata.cli.scripts.run_doc_embedding import main
 
+    """Run the document embedding pipeline."""
     reconfigure_logging(kwargs.get("log-level", "DEBUG"))
-    logger.info("Calling run_doc_embedding_l2")
-    main(**kwargs)
-
-
-@common_options
-@cli.command()
-@click.pass_context
-def run_doc_embedding_l3(ctx, *args, **kwargs) -> None:
-    """Run the document embedding Level-3 pipeline."""
-    from automata.cli.scripts.run_doc_embedding_l3 import main
-
-    reconfigure_logging(kwargs.get("log-level", "DEBUG"))
-    logger.info("Calling run_doc_embedding_l3")
-    main(**kwargs)
+    logger.info("Calling run_doc_embedding")
+    result = main(*args, **kwargs)
+    logger.info(f"Result = {result}")
 
 
 @common_options
@@ -92,24 +82,3 @@ def run_agent(ctx, *args, **kwargs) -> None:
     reconfigure_logging(kwargs.get("log-level", "DEBUG"))
     logger.info("Running agent")
     main(**kwargs)
-
-
-@common_options
-@agent_options
-@cli.command()
-@click.option("--fetch-issues", default="", help="Comma-separated list of issue numbers to fetch")
-@click.pass_context
-def run_agent_task(ctx, *args, **kwargs) -> None:
-    """
-    Run an agent task.
-
-    Note - This is similar to run_agent, but executes the agent
-    within the task framework. This allows for more flexibility
-    across multiple tasks.
-
-    """
-    from automata.cli.scripts.run_agent_task import main
-
-    reconfigure_logging(kwargs.get("log-level", "DEBUG"))
-    logger.info("Running the task")
-    main(**kwargs)
diff --git a/automata/cli/options.py b/automata/cli/options.py
@@ -24,10 +24,15 @@ def common_options(command: click.Command, *args, **kwargs) -> click.Command:
             help="Which index file to use for the embedding modifications.",
         ),
         click.option(
-            "--embedding-file",
+            "--code-embedding-file",
             default="symbol_code_embedding.json",
             help="Which embedding file to save to.",
         ),
+        click.option(
+            "--doc-embedding-file",
+            default="symbol_doc_embedding_l3.json",
+            help="Which embedding file to save to.",
+        ),
     ]
     for option in reversed(options):
         command = option(command)

diff --git a/automata/cli/scripts/run_agent.py b/automata/cli/scripts/run_agent.py
@@ -3,19 +3,18 @@
 
 from automata.config import GITHUB_API_KEY, REPOSITORY_NAME
 from automata.config.base import AgentConfigName
-from automata.config.openai_agent import AutomataOpenAIAgentConfigBuilder
+from automata.config.openai_agent import OpenAIAutomataAgentConfigBuilder
 from automata.core.agent.providers import OpenAIAutomataAgent
 from automata.core.github_management.client import GitHubClient
 from automata.core.singletons.dependency_factory import dependency_factory
-from automata.core.singletons.module_loader import py_module_loader
+from automata.core.singletons.py_module_loader import py_module_loader
 from automata.core.tools.factory import AgentToolFactory
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_ISSUES_PROMPT_PREFIX = """Provide a comprehensive explanation and full code implementation (in Markdown) which address the Github issue(s) that follow:"""
 
 DEFAULT_ISSUES_PROMPT_SUFFIX = """You may use the context oracle (multiple times if necessary) to ensure that you have proper context to answer this question. If you are tasked with writing code, then keep to the SOLID Principles Further, pay special attention to Dependency Inversion Principle and Dependency Injection."""
-# Solve the GitHub issues by writing the relevant code via the PyWriter tool. The issues begin now:"""
 
 
 def process_issues(issue_numbers: List[int], github_manager: GitHubClient) -> List[str]:
@@ -63,7 +62,7 @@ def main(*args, **kwargs):
     logger.info("Done building tools...")
     config_name = AgentConfigName(kwargs.get("agent_name", "automata-main"))
     agent_config = (
-        AutomataOpenAIAgentConfigBuilder.from_name(config_name)
+        OpenAIAutomataAgentConfigBuilder.from_name(config_name)
         .with_tools(tools)
         .with_model(kwargs.get("model", "gpt-4-0613"))
         .build()

diff --git a/automata/cli/scripts/run_code_embedding.py b/automata/cli/scripts/run_code_embedding.py
@@ -6,11 +6,10 @@
 from automata.config.base import ConfigCategory
 from automata.core.llm.providers.openai import OpenAIEmbeddingProvider
 from automata.core.memory_store.symbol_code_embedding import SymbolCodeEmbeddingHandler
-from automata.core.singletons.module_loader import py_module_loader
+from automata.core.singletons.dependency_factory import dependency_factory
+from automata.core.singletons.py_module_loader import py_module_loader
 from automata.core.symbol.graph import SymbolGraph
 from automata.core.symbol.symbol_utils import get_rankable_symbols
-from automata.core.symbol_embedding.base import JSONSymbolEmbeddingVectorDatabase
-from automata.core.symbol_embedding.builders import SymbolCodeEmbeddingBuilder
 from automata.core.utils import get_config_fpath
 
 logger = logging.getLogger(__name__)
@@ -23,36 +22,46 @@ def main(*args, **kwargs) -> str:
 
     py_module_loader.initialize()
 
-    scip_path = os.path.join(
+    scip_fpath = os.path.join(
         get_config_fpath(), ConfigCategory.SYMBOL.value, kwargs.get("index-file", "index.scip")
     )
-    embedding_path = os.path.join(
+    code_embedding_fpath = os.path.join(
         get_config_fpath(),
         ConfigCategory.SYMBOL.value,
-        kwargs.get("embedding-file", "symbol_code_embedding.json"),
+        kwargs.get("code-embedding-file", "symbol_code_embedding.json"),
     )
+    embedding_provider = OpenAIEmbeddingProvider()
 
-    symbol_graph = SymbolGraph(scip_path)
+    dependency_factory.set_overrides(
+        **{
+            "symbol_graph_scip_fpath": scip_fpath,
+            "code_embedding_fpath": code_embedding_fpath,
+            "embedding_provider": embedding_provider,
+        }
+    )
 
-    all_defined_symbols = symbol_graph.get_all_available_symbols()
-    filtered_symbols = sorted(get_rankable_symbols(all_defined_symbols), key=lambda x: x.dotpath)
+    symbol_graph: SymbolGraph = dependency_factory.get("symbol_graph")
+    symbol_code_embedding_handler: SymbolCodeEmbeddingHandler = dependency_factory.get(
+        "symbol_code_embedding_handler"
+    )
+    # Mock synchronization to allow us to build the initial embedding handler
+    symbol_graph.is_synchronized = True
+    symbol_code_embedding_handler.is_synchronized = True
 
-    embedding_db = JSONSymbolEmbeddingVectorDatabase(embedding_path)
-    embedding_provider = OpenAIEmbeddingProvider()
-    embedding_builder = SymbolCodeEmbeddingBuilder(embedding_provider)
-    embedding_handler = SymbolCodeEmbeddingHandler(embedding_db, embedding_builder)
+    all_defined_symbols = symbol_graph.get_sorted_supported_symbols()
+    filtered_symbols = sorted(get_rankable_symbols(all_defined_symbols), key=lambda x: x.dotpath)
 
     for symbol in tqdm(filtered_symbols):
         try:
-            embedding_handler.process_embedding(symbol)
-            embedding_db.save()
+            symbol_code_embedding_handler.process_embedding(symbol)
+            symbol_code_embedding_handler.embedding_db.save()
         except Exception as e:
             logger.error(f"Failed to update embedding for {symbol.dotpath}: {e}")
 
-    for symbol in embedding_handler.get_all_supported_symbols():
-        if symbol not in filtered_symbols:
-            logger.info(f"Discarding stale symbol {symbol}...")
-            embedding_db.discard(symbol.dotpath)
-    embedding_db.save()
+    # for symbol in symbol_code_embedding_handler.get_sorted_supported_symbols():
+    #     if symbol not in filtered_symbols:
+    #         logger.info(f"Discarding stale symbol {symbol}...")
+    #         symbol_code_embedding_handler.embedding_db.discard(symbol.dotpath)
+    # symbol_code_embedding_handler.embedding_db.save()
 
     return "Success"