Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion commit0/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from hydra.core.config_store import ConfigStore
from commit0.configs.config_class import Commit0Config
from commit0.harness.constants import COMMANDS, SPLIT
from omegaconf import OmegaConf


def main() -> None:
Expand All @@ -24,9 +25,17 @@ def main() -> None:
cs.store(name="user", group="Commit0Config", node=Commit0Config)
# have hydra to ignore all command-line arguments
sys_argv = copy.deepcopy(sys.argv)
sys.argv = [sys.argv[0]]
cfg_arg = next((arg for arg in sys_argv if arg.startswith("--cfg=")), None)

hydra.initialize(version_base=None, config_path="configs")
config = hydra.compose(config_name="user")

if cfg_arg:
sys_argv.remove(cfg_arg)
config_name = cfg_arg.split("=")[1]
user_config = OmegaConf.load(config_name)
config = OmegaConf.merge(config, user_config)

# after hydra gets all configs, put command-line arguments back
sys.argv = sys_argv
# repo_split: split from command line has a higher priority than split in hydra
Expand Down
3 changes: 3 additions & 0 deletions commit0/harness/docker_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Any
import sys

from commit0.harness.constants import (
BASE_IMAGE_BUILD_DIR,
Expand Down Expand Up @@ -39,6 +40,8 @@ def setup_logger(repo: str, log_file: Path, mode: str = "w") -> logging.Logger:
log_file.parent.mkdir(parents=True, exist_ok=True)
logger = logging.getLogger(f"{repo}.{log_file.name}")
handler = logging.FileHandler(log_file, mode=mode)
stdout_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stdout_handler)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
Expand Down
2 changes: 2 additions & 0 deletions commit0/harness/run_pytest_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,10 @@ def main(
eval_file.write_text(eval_script)

if ExecutionBackend(backend) == ExecutionBackend.MODAL:
logger.info("Runnning on Modal")
execution_context = Modal
elif ExecutionBackend(backend) == ExecutionBackend.LOCAL:
logger.info("Runnning locally")
execution_context = Docker
else:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion docs/about.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Spec2Repo is made by ...
Commit0 is ..
33 changes: 33 additions & 0 deletions docs/distributed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Distributed

One of the main advantages of `commit0` is that it can run
a range of unit tests in distributed environments.

By default, the library is configured to work with [modal](https://modal.com/).

```bash
pip install modal
modal token new
```

## Modal Setup

To enable distributed run, first
create a file called `distributed.yaml`

```yaml
backend: modal
base_dir: repos.dist/
```

You can pass this configuration file as an argumnet to clone.

```bash
commit0 clone lite --cfg=distributed
```

Next to run tests you can run the standard test command.

```bash
commit0 test simpy master tests/test_event.py::test_succeed --cfg=distributed
```
78 changes: 73 additions & 5 deletions docs/index.md

Large diffs are not rendered by default.

88 changes: 88 additions & 0 deletions docs/make_md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import datasets
import subprocess

import requests
from bs4 import BeautifulSoup

def get_github_avatar(repo):
"""
Given a GitHub repo in the format 'owner/repo', get the avatar URL of the organization or user.
"""
try:
org = repo.split("/")[0]
# Construct the URL for the repo
url = f"https://github.com/{org}"

# Make a request to the page
response = requests.get(url)

# Check if the request was successful
if response.status_code != 200:
print(f"Failed to fetch page for {repo}. Status code: {response.status_code}")
return None

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Find the meta tag with property "og:image" which contains the avatar URL
meta_tag = soup.find('meta', property='og:image')

if meta_tag and 'content' in meta_tag.attrs:
avatar_url = meta_tag['content']
return avatar_url
else:
print(f"Avatar URL not found for {repo}")
return None

except Exception as e:
print(f"An error occurred: {e}")
return None

d = datasets.load_dataset("wentingzhao/commit0_docstring", split="test")

print(d)



print("| | Name | Repo | Commit0 | Tests | | ")
print("|--|--------|-------|----|----|------| ")
overload = {
"simpy" : "https://simpy.readthedocs.io/en/4.1.1/_images/simpy-logo-small.png",
"tinydb" : "https://raw.githubusercontent.com/msiemens/tinydb/master/artwork/logo.png",
"bitstring": "https://bitstring.readthedocs.io/en/stable/_images/bitstring_logo.png",
"seaborn": "https://raw.githubusercontent.com/mwaskom/seaborn/master/doc/_static/logo-wide-lightbg.svg",
"statsmodels": "https://raw.githubusercontent.com/statsmodels/statsmodels/main/docs/source/images/statsmodels-logo-v2-horizontal.svg",
"pyboy" : "https://github.com/Baekalfen/PyBoy/raw/master/extras/README/pyboy.svg",
}
skip = {
"pyjwt",
"wcwidth",
"chardet",
"dnspython",
"imapclient",
"pexpect",
"dulwich",
"voluptuous",
"requests",
"tlslite-ng",
"more-itertools",
"deprecated",
"cachetools",
"paramiko",
"jedi",
"sqlparse",
}
for i, ex in enumerate(d):
img = get_github_avatar(ex["original_repo"])

name = ex["repo"].split("/")[1]
result = subprocess.check_output(f"commit0 get-tests {name} | wc", shell=True, text=True)

tests = int(result.split()[0])
if name.lower() not in skip and name.lower() not in overload:
img = f"<img src='{img}' width='100px'/>"
elif name.lower() in overload:
img = f"<img src='{overload[name.lower()]}' width='100px'/>"
else:
img = f"<b>{name}</b>"
print(f"| {img} | [{name}]({ex['setup']['specification']}) | [[orig](http://github.com/{ex['original_repo']})] | [[commit0](http://github.com/{ex['repo']})] | {tests} | <img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAABkCAQAAADtJZLrAAAAD0lEQVR42mNkYGAcRcQhADxaAGWhD8eHAAAAAElFTkSuQmCC'/> |")
125 changes: 108 additions & 17 deletions docs/setup.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Quickstart

## Install

First be sure that you have docker tools installed.

```bash
Expand All @@ -7,42 +11,129 @@ apt install docker
To install the benchmark run,

```bash
pip install spec2repo
pip install commit0
```

Then run
## Commands

The system is a command-line tool that allows you to run unit-tests on a
variety of libraries in isolated environments. To get started with the full
setup run the `clone` command which will install a clone the code of a subset
of libraries to your `repos/` directory.

```bash
spec2repo new local
commit0 clone lit
```

This will generate a file `spec2repo.yml` in your project.
To launch the benchmark suite run
Next run the `build` command which will configure Docker containers for
each of the libraries with isolated virtual environments. The command uses the
[uv](https://github.com/astral-sh/uv) library for efficient builds.

```bash
spec2repo launch
commit0 build lit
```

This will launch a set of docker instances for each of the repos as well as a
local master.
The main operation you can do with these enviroments is to run tests.
Here we run [a test](https://github.com/commit-0/simpy/blob/master/tests/test_event.py#L11) in the `simpy` library.

```bash
commit0 test simpy tests/test_event.py::test_succeed
```

Now let's apply a patch to one of our repos:
This test should run and pass, but others will fail.

```bash
commit0 test minitorch tests/test_operators.py::test_relu
```

Let's now manually go in and change that repo.
This is all just standard shell commands.

```bash
cd repos/minitorch/
git checkout -b first_change
patch ../../minitorch.example.patch .
spec2repo test minitorch first_change test_add
git checkout -b mychange
```

This will run the `test_add` in the MiniTorch Repository and show the results.
And apply and commit this patch.

To get your current score on a repository you can run
```
--- a/minitorch/operators.py
+++ b/minitorch/operators.py
@@ -81,7 +81,7 @@ def relu(x: float) -> float:
(See https://en.wikipedia.org/wiki/Rectifier_(neural_networks) .)
"""
# TODO: Implement for Task 0.1.
- raise NotImplementedError('Need to implement for Task 0.1')
+ return 1. if x > 0. else 0.
```

Once this is done we can run `test` with
a branch and the environment will sync and run.

```bash
commit0 test minitorch branch=mychange tests/test_operators.py::test_relu
```

## Running an Agent

Next we will see how this can be run with an AI agent system.
We will use [Aider](https://aider.chat/) which is a nice
command-line oriented agent system.

To setup Aider first set your api key.
We recommend using Claude Sonnet.

```bash
spec2repo score minitorch
# Work with Claude 3.5 Sonnet on your repo
export ANTHROPIC_API_KEY=your-key-goes-here
```

## Running Aider
Once this is setup you can run Aider with the following command.
This will edit the files locally in your branch, but
run the tests inside the environment.

...
```bash
aider --model sonnet --file repos/minitorch/operators.py --message "fill in" \
--auto-test --test \
--test-cmd 'commit0 test minitorch branch=mychange tests/test_operators.py::test_relu' \
--yes
```

This will run an LLM agent that will try to fill in the
functions in one file of the minitorch library.

For a full example baseline system that tries to solve
all the tests in the library see the [baseline](baseline) documentation.


## Distributed Tests

One of the main advantages of `commit0` is that it can run
a range of unit tests in distributed environments.

By default, the library is configured to work with [modal](https://modal.com/).

```bash
pip install modal
modal token new
```

To enable distributed run, first
create a file called `distributed.yaml`

```yaml
backend: modal
base_dir: repos.dist/
```

You can pass this configuration file as an argumnet to clone.

```bash
commit0 clone lite --cfg=distributed.yaml
```

Next to run tests you can run the standard test command.

```bash
commit0 test simpy master tests/test_event.py::test_succeed --cfg=distributed.yaml
```