commit-0 · wenting-zhao · Sep 19, 2024 · Sep 12, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/commit0/__main__.py b/commit0/__main__.py
@@ -11,6 +11,7 @@
 from hydra.core.config_store import ConfigStore
 from commit0.configs.config_class import Commit0Config
 from commit0.harness.constants import COMMANDS, SPLIT
+from omegaconf import OmegaConf
 
 
 def main() -> None:
@@ -24,9 +25,17 @@ def main() -> None:
     cs.store(name="user", group="Commit0Config", node=Commit0Config)
     # have hydra to ignore all command-line arguments
     sys_argv = copy.deepcopy(sys.argv)
-    sys.argv = [sys.argv[0]]
+    cfg_arg = next((arg for arg in sys_argv if arg.startswith("--cfg=")), None)
+
     hydra.initialize(version_base=None, config_path="configs")
     config = hydra.compose(config_name="user")
+
+    if cfg_arg:
+        sys_argv.remove(cfg_arg)
+        config_name = cfg_arg.split("=")[1]
+        user_config = OmegaConf.load(config_name)
+        config = OmegaConf.merge(config, user_config)
+
     # after hydra gets all configs, put command-line arguments back
     sys.argv = sys_argv
     # repo_split: split from command line has a higher priority than split in hydra

diff --git a/commit0/harness/docker_build.py b/commit0/harness/docker_build.py
@@ -7,6 +7,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from typing import Any
+import sys
 
 from commit0.harness.constants import (
     BASE_IMAGE_BUILD_DIR,
@@ -39,6 +40,8 @@ def setup_logger(repo: str, log_file: Path, mode: str = "w") -> logging.Logger:
     log_file.parent.mkdir(parents=True, exist_ok=True)
     logger = logging.getLogger(f"{repo}.{log_file.name}")
     handler = logging.FileHandler(log_file, mode=mode)
+    stdout_handler = logging.StreamHandler(sys.stdout)
+    logger.addHandler(stdout_handler)
     formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
     handler.setFormatter(formatter)
     logger.addHandler(handler)

diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
@@ -100,8 +100,10 @@ def main(
     eval_file.write_text(eval_script)
 
     if ExecutionBackend(backend) == ExecutionBackend.MODAL:
+        logger.info("Runnning on Modal")
         execution_context = Modal
     elif ExecutionBackend(backend) == ExecutionBackend.LOCAL:
+        logger.info("Runnning locally")
         execution_context = Docker
     else:
         raise ValueError(

diff --git a/docs/about.md b/docs/about.md
@@ -1 +1 @@
-Spec2Repo is made by ...
+Commit0 is ..
diff --git a/docs/distributed.md b/docs/distributed.md
@@ -0,0 +1,33 @@
+# Distributed
+
+One of the main advantages of `commit0` is that it can run
+a range of unit tests in distributed environments.
+
+By default, the library is configured to work with [modal](https://modal.com/).
+
+```bash
+pip install modal
+modal token new
+```
+
+## Modal Setup
+
+To enable distributed run, first
+create a file called `distributed.yaml`
+
+```yaml
+backend: modal
+base_dir: repos.dist/
+```
+
+You can pass this configuration file as an argumnet to clone.
+
+```bash
+commit0 clone lite --cfg=distributed
+```
+
+Next to run tests you can run the standard test command.
+
+```bash
+commit0 test simpy master tests/test_event.py::test_succeed --cfg=distributed
+```
diff --git a/docs/index.md b/docs/index.md
diff --git a/docs/make_md.py b/docs/make_md.py
@@ -0,0 +1,88 @@
+import datasets
+import subprocess
+
+import requests
+from bs4 import BeautifulSoup
+
+def get_github_avatar(repo):
+    """
+    Given a GitHub repo in the format 'owner/repo', get the avatar URL of the organization or user.
+    """
+    try:
+        org = repo.split("/")[0]
+        # Construct the URL for the repo
+        url = f"https://github.com/{org}"
+
+        # Make a request to the page
+        response = requests.get(url)
+
+        # Check if the request was successful
+        if response.status_code != 200:
+            print(f"Failed to fetch page for {repo}. Status code: {response.status_code}")
+            return None
+
+        # Parse the HTML content using BeautifulSoup
+        soup = BeautifulSoup(response.content, 'html.parser')
+
+        # Find the meta tag with property "og:image" which contains the avatar URL
+        meta_tag = soup.find('meta', property='og:image')
+
+        if meta_tag and 'content' in meta_tag.attrs:
+            avatar_url = meta_tag['content']
+            return avatar_url
+        else:
+            print(f"Avatar URL not found for {repo}")
+            return None
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+d = datasets.load_dataset("wentingzhao/commit0_docstring", split="test")
+
+print(d)
+
+
+
+print("|  | Name |  Repo | Commit0 | Tests |  | ")
+print("|--|--------|-------|----|----|------| ")
+overload = {
+    "simpy" : "https://simpy.readthedocs.io/en/4.1.1/_images/simpy-logo-small.png",
+    "tinydb" : "https://raw.githubusercontent.com/msiemens/tinydb/master/artwork/logo.png",
+    "bitstring": "https://bitstring.readthedocs.io/en/stable/_images/bitstring_logo.png",
+    "seaborn":     "https://raw.githubusercontent.com/mwaskom/seaborn/master/doc/_static/logo-wide-lightbg.svg",
+    "statsmodels": "https://raw.githubusercontent.com/statsmodels/statsmodels/main/docs/source/images/statsmodels-logo-v2-horizontal.svg",
+    "pyboy" : "https://github.com/Baekalfen/PyBoy/raw/master/extras/README/pyboy.svg",
+}
+skip = {
+    "pyjwt",
+    "wcwidth",
+    "chardet",
+    "dnspython",
+    "imapclient",
+    "pexpect",
+    "dulwich",
+    "voluptuous",
+    "requests",
+    "tlslite-ng",
+    "more-itertools",
+    "deprecated",
+    "cachetools",
+    "paramiko",
+    "jedi",
+    "sqlparse",
+}
+for i, ex in enumerate(d):
+    img = get_github_avatar(ex["original_repo"])
+
+    name = ex["repo"].split("/")[1]
+    result = subprocess.check_output(f"commit0 get-tests {name} | wc", shell=True, text=True)
+
+    tests = int(result.split()[0])
+    if name.lower() not in skip and name.lower() not in overload:
+        img = f"<img src='{img}' width='100px'/>"
+    elif name.lower() in overload:
+        img = f"<img src='{overload[name.lower()]}' width='100px'/>"
+    else:
+        img = f"<b>{name}</b>"
+    print(f"| {img} | [{name}]({ex['setup']['specification']}) | [[orig](http://github.com/{ex['original_repo']})] | [[commit0](http://github.com/{ex['repo']})] | {tests} | <img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAABkCAQAAADtJZLrAAAAD0lEQVR42mNkYGAcRcQhADxaAGWhD8eHAAAAAElFTkSuQmCC'/> |")
diff --git a/docs/setup.md b/docs/setup.md
@@ -1,3 +1,7 @@
+# Quickstart
+
+## Install
+
 First be sure that you have docker tools installed.
 
 ```bash
@@ -7,42 +11,129 @@ apt install docker
 To install the benchmark run,
 
 ```bash
-pip install spec2repo
+pip install commit0
 ```
 
-Then run
+## Commands
+
+The system is a command-line tool that allows you to run unit-tests on a
+variety of libraries in isolated environments. To get started with the full
+setup run the `clone` command which will install a clone the code of a subset
+of libraries to your `repos/` directory.
 
 ```bash
-spec2repo new local
+commit0 clone lit
 ```
 
-This will generate a file `spec2repo.yml` in your project.
-To launch the benchmark suite run
+Next run the `build` command which will configure Docker containers for
+each of the libraries with isolated virtual environments. The command uses the
+[uv](https://github.com/astral-sh/uv) library for efficient builds.
 
 ```bash
-spec2repo launch
+commit0 build lit
 ```
 
-This will launch a set of docker instances for each of the repos as well as a
-local master.
+The main operation you can do with these enviroments is to run tests.
+Here we run [a test](https://github.com/commit-0/simpy/blob/master/tests/test_event.py#L11) in the `simpy` library.
+
+```bash
+commit0 test simpy tests/test_event.py::test_succeed
+```
 
-Now let's apply a patch to one of our repos:
+This test should run and pass, but others will fail.
+
+```bash
+commit0 test minitorch tests/test_operators.py::test_relu
+```
+
+Let's now manually go in and change that repo.
+This is all just standard shell commands.
 
 ```bash
 cd repos/minitorch/
-git checkout -b first_change
-patch ../../minitorch.example.patch .
-spec2repo test minitorch first_change test_add
+git checkout -b mychange
 ```
 
-This will run the `test_add` in the MiniTorch Repository and show the results.
+And apply and commit this patch.
 
-To get your current score on a repository you can run
+```
+--- a/minitorch/operators.py
++++ b/minitorch/operators.py
+@@ -81,7 +81,7 @@ def relu(x: float) -> float:
+     (See https://en.wikipedia.org/wiki/Rectifier_(neural_networks) .)
+     """
+     # TODO: Implement for Task 0.1.
+-    raise NotImplementedError('Need to implement for Task 0.1')
++    return 1. if x > 0. else 0.
+```
+
+Once this is done we can run `test` with
+a branch and the environment will sync and run.
+
+```bash
+commit0 test minitorch branch=mychange tests/test_operators.py::test_relu
+```
+
+## Running an Agent
+
+Next we will see how this can be run with an AI agent system.
+We will use [Aider](https://aider.chat/) which is a nice
+command-line oriented agent system.
+
+To setup Aider first set your api key.
+We recommend using Claude Sonnet.
 
 ```bash
-spec2repo score minitorch
+# Work with Claude 3.5 Sonnet on your repo
+export ANTHROPIC_API_KEY=your-key-goes-here
 ```
 
-## Running Aider
+Once this is setup you can run Aider with the following command.
+This will edit the files locally in your branch, but
+run the tests inside the environment.
 
-...
+```bash
+aider --model sonnet --file repos/minitorch/operators.py --message "fill in" \
+     --auto-test --test \
+     --test-cmd 'commit0 test minitorch branch=mychange tests/test_operators.py::test_relu' \
+     --yes
+```
+
+This will run an LLM agent that will try to fill in the
+functions in one file of the minitorch library.
+
+For a full example baseline system that tries to solve
+all the tests in the library see the [baseline](baseline) documentation.
+
+
+## Distributed Tests
+
+One of the main advantages of `commit0` is that it can run
+a range of unit tests in distributed environments.
+
+By default, the library is configured to work with [modal](https://modal.com/).
+
+```bash
+pip install modal
+modal token new
+```
+
+To enable distributed run, first
+create a file called `distributed.yaml`
+
+```yaml
+backend: modal
+base_dir: repos.dist/
+```
+
+You can pass this configuration file as an argumnet to clone.
+
+```bash
+commit0 clone lite --cfg=distributed.yaml
+```
+
+Next to run tests you can run the standard test command.
+
+```bash
+commit0 test simpy master tests/test_event.py::test_succeed --cfg=distributed.yaml
+```