Merge pull request #1077 from gpt-engineer-org/entrypoint_prompt

image prompts - Entrypoint prompt - additional CLI argument
gpt-engineer-org · Mar 20, 2024 · 63d0a5e · 63d0a5e
2 parents 7873779 + eb057ee
commit 63d0a5e
Show file tree

Hide file tree

Showing 30 changed files with 1,028 additions and 499 deletions.
diff --git a/.gitignore b/.gitignore
@@ -59,6 +59,7 @@ scratchpad
 projects/*
 !projects/example/prompt
 !projects/example-improve
+!projects/example-vision
 
 # docs
 

diff --git a/README.md b/README.md
@@ -66,11 +66,22 @@ The gptengineer.app team is actively supporting the open source community.
 
 ## Features
 
+### Pre Prompts
 You can specify the "identity" of the AI agent by overriding the `preprompts` folder, with your own version of the `preprompts`, using the `--use-custom-preprompts` argument.
 
 Editing the `preprompts` is how you make the agent remember things between projects.
 
-You can also run with open source models, like WizardCoder. See the [documentation](https://gpt-engineer.readthedocs.io/en/latest/open_models.html) for example instructions.
+### Vision
+
+By default, GPT Engineer expects text input via a `prompt` file. It can also accept imagine inputs for vision capable models. This can be useful for adding UX or architecture diagrams as additional context for GPT Engineer. You can do this by specifiying an image directory with the --image_directory flag and setting a vision capable model in the second cli argument.
+
+E.g. `gpte projects/example-vision gpt-4-vision-preview --prompt_file prompt/text --image_directory prompt/images -i`
+
+### Open source, local and alternative models
+
+By defaul GPT Engineer supports OpenAI Models via the OpenAI API or Azure Open AI API, and Anthropic models.
+
+With a little extra set up you can also run with open source models, like WizardCoder. See the [documentation](https://gpt-engineer.readthedocs.io/en/latest/open_models.html) for example instructions.
 
 ## Mission
 

diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py
@@ -22,6 +22,7 @@
 )
 from gpt_engineer.core.files_dict import FilesDict
 from gpt_engineer.core.preprompts_holder import PrepromptsHolder
+from gpt_engineer.core.prompt import Prompt
 
 CodeGenType = TypeVar("CodeGenType", bound=Callable[[AI, str, BaseMemory], FilesDict])
 CodeProcessor = TypeVar(
@@ -147,7 +148,7 @@ def with_default_config(
             preprompts_holder=preprompts_holder or PrepromptsHolder(PREPROMPTS_PATH),
         )
 
-    def init(self, prompt: str) -> FilesDict:
+    def init(self, prompt: Prompt) -> FilesDict:
         """
         Generates a new piece of code using the AI and step bundle based on the provided prompt.
 
@@ -166,7 +167,7 @@ def init(self, prompt: str) -> FilesDict:
             self.ai, prompt, self.memory, self.preprompts_holder
         )
         entrypoint = gen_entrypoint(
-            self.ai, files_dict, self.memory, self.preprompts_holder
+            self.ai, prompt, files_dict, self.memory, self.preprompts_holder
         )
         combined_dict = {**files_dict, **entrypoint}
         files_dict = FilesDict(combined_dict)
@@ -175,13 +176,15 @@ def init(self, prompt: str) -> FilesDict:
             self.execution_env,
             files_dict,
             preprompts_holder=self.preprompts_holder,
+            prompt=prompt,
+            memory=self.memory,
         )
         return files_dict
 
     def improve(
         self,
         files_dict: FilesDict,
-        prompt: str,
+        prompt: Prompt,
         execution_command: Optional[str] = None,
     ) -> FilesDict:
         """
@@ -205,19 +208,18 @@ def improve(
         files_dict = self.improve_fn(
             self.ai, prompt, files_dict, self.memory, self.preprompts_holder
         )
-
-        # No need to run entrypoint for improve right?
-        # if not execution_command and ENTRYPOINT_FILE not in files_dict:
-        #     entrypoint = gen_entrypoint(
-        #         self.ai, files_dict, self.memory, self.preprompts_holder
-        #     )
-        #     combined_dict = {**files_dict, **entrypoint}
-        #     files_dict = FilesDict(combined_dict)
-
+        # entrypoint = gen_entrypoint(
+        #     self.ai, prompt, files_dict, self.memory, self.preprompts_holder
+        # )
+        # combined_dict = {**files_dict, **entrypoint}
+        # files_dict = FilesDict(combined_dict)
         # files_dict = self.process_code_fn(
         #     self.ai,
         #     self.execution_env,
         #     files_dict,
         #     preprompts_holder=self.preprompts_holder,
+        #     prompt=prompt,
+        #     memory=self.memory,
         # )
+
         return files_dict
diff --git a/gpt_engineer/applications/cli/collect.py b/gpt_engineer/applications/cli/collect.py
@@ -31,6 +31,7 @@
     human_review_input,
 )
 from gpt_engineer.core.default.disk_memory import DiskMemory
+from gpt_engineer.core.prompt import Prompt
 
 
 def send_learning(learning: Learning):
@@ -62,7 +63,7 @@ def send_learning(learning: Learning):
 
 
 def collect_learnings(
-    prompt: str,
+    prompt: Prompt,
     model: str,
     temperature: float,
     config: any,
@@ -138,7 +139,7 @@ def collect_learnings(
 
 
 def collect_and_send_human_review(
-    prompt: str,
+    prompt: Prompt,
     model: str,
     temperature: float,
     config: Tuple[str, ...],

diff --git a/gpt_engineer/applications/cli/learning.py b/gpt_engineer/applications/cli/learning.py
@@ -16,7 +16,7 @@
     Checks if the user has previously given consent to store their data and, if not, asks for it.
 ask_collection_consent() -> bool
     Prompts the user for consent to store their data for the purpose of improving GPT Engineer.
-extract_learning(prompt: str, model: str, temperature: float, config: Tuple[str, ...], memory: DiskMemory, review: Review) -> Learning
+extract_learning(prompt: Prompt, model: str, temperature: float, config: Tuple[str, ...], memory: DiskMemory, review: Review) -> Learning
     Extracts feedback and session details to create a Learning instance based on the provided parameters.
 get_session() -> str
     Retrieves a unique identifier for the current user session, creating one if it does not exist.
@@ -40,6 +40,7 @@
 from termcolor import colored
 
 from gpt_engineer.core.default.disk_memory import DiskMemory
+from gpt_engineer.core.prompt import Prompt
 
 
 @dataclass_json
@@ -97,7 +98,7 @@ class Learning:
         The version of the learning data schema.
     """
 
-    prompt: str
+    prompt: Prompt
     model: str
     temperature: float
     config: str
@@ -236,7 +237,7 @@ def ask_collection_consent() -> bool:
 
 
 def extract_learning(
-    prompt: str,
+    prompt: Prompt,
     model: str,
     temperature: float,
     config: Tuple[str, ...],

diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py
@@ -29,6 +29,8 @@
 import typer
 
 from dotenv import load_dotenv
+from langchain.cache import SQLiteCache
+from langchain.globals import set_llm_cache
 
 from gpt_engineer.applications.cli.cli_agent import CliAgent
 from gpt_engineer.applications.cli.collect import collect_and_send_human_review
@@ -52,6 +54,7 @@
     stage_files,
 )
 from gpt_engineer.core.preprompts_holder import PrepromptsHolder
+from gpt_engineer.core.prompt import Prompt
 from gpt_engineer.tools.custom_steps import clarified_gen, lite_gen, self_heal
 
 app = typer.Typer()  # creates a CLI app
@@ -73,7 +76,25 @@ def load_env_if_needed():
     openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
-def load_prompt(input_repo: DiskMemory, improve_mode):
+def concatenate_paths(base_path, sub_path):
+    # Compute the relative path from base_path to sub_path
+    relative_path = os.path.relpath(sub_path, base_path)
+
+    # If the relative path is not in the parent directory, use the original sub_path
+    if not relative_path.startswith(".."):
+        return sub_path
+
+    # Otherwise, concatenate base_path and sub_path
+    return os.path.normpath(os.path.join(base_path, sub_path))
+
+
+def load_prompt(
+    input_repo: DiskMemory,
+    improve_mode: bool,
+    prompt_file: str,
+    image_directory: str,
+    entrypoint_prompt_file: str = "",
+) -> Prompt:
     """
     Load or request a prompt from the user based on the mode.
 
@@ -89,16 +110,47 @@ def load_prompt(input_repo: DiskMemory, improve_mode):
     str
         The loaded or inputted prompt.
     """
-    if input_repo.get("prompt"):
-        return input_repo.get("prompt")
 
-    if not improve_mode:
-        input_repo["prompt"] = input(
-            "\nWhat application do you want gpt-engineer to generate?\n"
+    if os.path.isdir(prompt_file):
+        raise ValueError(
+            f"The path to the prompt, {prompt_file}, already exists as a directory. No prompt can be read from it. Please specify a prompt file using --prompt_file"
+        )
+    prompt_str = input_repo.get(prompt_file)
+    if not prompt_str:
+        if not improve_mode:
+            prompt_str = input(
+                "\nWhat application do you want gpt-engineer to generate?\n"
+            )
+        else:
+            prompt_str = input("\nHow do you want to improve the application?\n")
+
+    if entrypoint_prompt_file == "":
+        entrypoint_prompt = ""
+    else:
+        full_entrypoint_prompt_file = concatenate_paths(
+            input_repo.path, entrypoint_prompt_file
+        )
+        if os.path.isfile(full_entrypoint_prompt_file):
+            entrypoint_prompt = input_repo.get(full_entrypoint_prompt_file)
+
+        else:
+            raise ValueError("The provided file at --entrypoint-prompt does not exist")
+
+    if image_directory == "":
+        return Prompt(prompt_str, entrypoint_prompt=entrypoint_prompt)
+
+    full_image_directory = concatenate_paths(input_repo.path, image_directory)
+    if os.path.isdir(full_image_directory):
+        if len(os.listdir(full_image_directory)) == 0:
+            raise ValueError("The provided --image_directory is empty.")
+        image_repo = DiskMemory(full_image_directory)
+        return Prompt(
+            prompt_str,
+            image_repo.get(".").to_dict(),
+            entrypoint_prompt=entrypoint_prompt,
         )
     else:
-        input_repo["prompt"] = input("\nHow do you want to improve the application?\n")
-    return input_repo.get("prompt")
+        raise ValueError("The provided --image_directory is not a directory.")
 
 
 def get_preprompts_path(use_custom_preprompts: bool, input_path: Path) -> Path:
@@ -140,7 +192,7 @@ def prompt_yesno(question: str) -> bool:
 @app.command()
 def main(
     project_path: str = typer.Argument("projects/example", help="path"),
-    model: str = typer.Argument("gpt-4-1106-preview", help="model id string"),
+    model: str = typer.Argument("gpt-4-0125-preview", help="model id string"),
     temperature: float = 0.1,
     improve_mode: bool = typer.Option(
         False,
@@ -184,6 +236,26 @@ def main(
         "--llm-via-clipboard",
         help="Use the clipboard to communicate with the AI.",
     ),
+    prompt_file: str = typer.Option(
+        "prompt",
+        "--prompt_file",
+        help="Relative path to a text file containing a prompt.",
+    ),
+    entrypoint_prompt_file: str = typer.Option(
+        "",
+        "--entrypoint_prompt",
+        help="Relative path to a text file containing a file that specifies requirements for you entrypoint.",
+    ),
+    image_directory: str = typer.Option(
+        "",
+        "--image_directory",
+        help="Relative path to a folder containing images.",
+    ),
+    use_cache: bool = typer.Option(
+        False,
+        "--use_cache",
+        help="Speeds up computations and saves tokens when running the same prompt multiple times by caching the LLM response.",
+    ),
     verbose: bool = typer.Option(False, "--verbose", "-v"),
 ):
     """
@@ -213,6 +285,14 @@ def main(
         The endpoint for Azure OpenAI services.
     use_custom_preprompts : bool
         Flag indicating whether to use custom preprompts.
+    prompt_file : str
+        Relative path to a text file containing a prompt.
+    entrypoint_prompt_file: str
+        Relative path to a text file containing a file that specifies requirements for you entrypoint.
+    image_directory: str
+        Relative path to a folder containing images.
+    use_cache: bool
+        Speeds up computations and saves tokens when running the same prompt multiple times by caching the LLM response.
     verbose : bool
         Flag indicating whether to enable verbose logging.
 
@@ -223,6 +303,8 @@ def main(
 
     logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
 
+    if use_cache:
+        set_llm_cache(SQLiteCache(database_path=".langchain.db"))
     if improve_mode:
         assert not (
             clarify_mode or lite_mode
@@ -248,7 +330,17 @@ def main(
             print("Initializing an empty git repository")
             init_git_repo(path)
 
-    prompt = load_prompt(DiskMemory(path), improve_mode)
+    prompt = load_prompt(
+        DiskMemory(path),
+        improve_mode,
+        prompt_file,
+        image_directory,
+        entrypoint_prompt_file,
+    )
+
+    # todo: if ai.vision is false and not llm_via_clipboard - ask if they would like to use gpt-4-vision-preview instead? If so recreate AI
+    if not ai.vision:
+        prompt.image_urls = None
 
     # configure generation function
     if clarify_mode:

diff --git a/gpt_engineer/benchmark/types.py b/gpt_engineer/benchmark/types.py
@@ -26,6 +26,7 @@
 
 from gpt_engineer.core.base_execution_env import BaseExecutionEnv
 from gpt_engineer.core.files_dict import FilesDict
+from gpt_engineer.core.prompt import Prompt
 
 
 @dataclass
@@ -56,7 +57,7 @@ class Task:
     name: str
     initial_code: Optional[FilesDict]
     command: Optional[str]
-    prompt: str
+    prompt: Prompt
     assertions: Optional[Dict[str, Assertion]]