In [1]:
import os
os.sys.path.append("/data2/zzd/rl_llm/swe-rl/src")
os.environ["THINKING"] = "no"
os.environ["ANSWER_START_TAG"] = "<think>"
os.environ["ANSWER_END_TAG"] = "</think>"
os.environ["PLAYGROUND_DIR"] = "tmp_agentless_notebook"
os.environ["PROJECT_FILE_LOC"] = "/data2/zzd/rl_llm/swe-rl/repo_structure/repo_structures"
os.environ["TOKENIZER_MODEL"] = "/data3/ckpt/voidful/Llama-3.2-8B-Instruct"

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
os.environ["HF_DATASETS_CACHE"] = "/data2/zzd/cache"
os.environ["HF_HOME"] = "/data2/zzd/cache"
os.environ["TMPDIR"] = "/data2/zzd/tmp"

from typing import Literal
from dataclasses import dataclass, field
from datasets import Dataset, load_dataset
import swerl.agentless_mini.utils as utils

@dataclass(frozen=True)
class Args:
    # Localization file and output file for repair
    loc_file: str = field(default="/data2/zzd/rl_llm/swe-rl/resources/sweb_lite_gt_loc.jsonl")
    output_folder: str = field(default="repair_output_notebook")
    max_input_tokens: int = field(default=60000)

    @property
    def output_file(self):
        return (Path(self.output_folder) / "output.jsonl").as_posix()

@dataclass(frozen=True)
class BenchArgs:
    shard: int = field(default=0)
    num_shards: int = field(default=125)
    # We've only tested on the Verified subset currently,
    # but you can use shard/num_shards to split the dataset
    dataset: Literal[
        "princeton-nlp/SWE-bench_Lite", "princeton-nlp/SWE-bench_Verified"
    ] = field(default="princeton-nlp/SWE-bench_Verified")

    def load(self) -> Dataset:
        dataset = load_dataset(self.dataset, split="test")
        return dataset.shard(
            num_shards=self.num_shards,
            index=self.shard,
            contiguous=False,
        )


@dataclass(frozen=True)
class InferenceArgs:
    model: str = field(default=os.environ.get("TOKENIZER_MODEL", "Qwen/Qwen3-30B-A3B"))
    temperature: float = field(default=0.7)
    num_samples: int = field(default=1)
    max_tokens: int = field(default=4096)
    max_concurrent_requests: int = field(
        default=64,
        metadata={"help": "Maximum number of concurrent requests sent to the backend"},
    )


params = utils.args.parse_args_into_dataclasses(
    BenchArgs,
    InferenceArgs,
    Args
)
print(params)

  from .autonotebook import tqdm as notebook_tqdm


(BenchArgs(shard=0, num_shards=125, dataset='princeton-nlp/SWE-bench_Verified'), InferenceArgs(model='/data3/ckpt/voidful/Llama-3.2-8B-Instruct', temperature=0.7, num_samples=1, max_tokens=4096, max_concurrent_requests=64), Args(loc_file='/data2/zzd/rl_llm/swe-rl/resources/sweb_lite_gt_loc.jsonl', output_folder='repair_output_notebook', max_input_tokens=60000))


In [2]:
import json
from pathlib import Path



bench_args, inference_args, args = params

output_folder = Path(args.output_folder)
output_folder.mkdir(parents=True, exist_ok=True)
meta = dict(
    bench_args=str(bench_args),
    inference_args=str(inference_args),
    args=str(args),
)
with (output_folder / "args.json").open("w") as f:
    json.dump(meta, f, indent=4)

swe_bench_data = bench_args.load()

swe_bench_data[0]

{'repo': 'astropy/astropy',
 'instance_id': 'astropy__astropy-12907',
 'base_commit': 'd16bfe05a744909de4b27f5875fe0d4ed41ce607',
 'patch': "diff --git a/astropy/modeling/separable.py b/astropy/modeling/separable.py\n--- a/astropy/modeling/separable.py\n+++ b/astropy/modeling/separable.py\n@@ -242,7 +242,7 @@ def _cstack(left, right):\n         cright = _coord_matrix(right, 'right', noutp)\n     else:\n         cright = np.zeros((noutp, right.shape[1]))\n-        cright[-right.shape[0]:, -right.shape[1]:] = 1\n+        cright[-right.shape[0]:, -right.shape[1]:] = right\n \n     return np.hstack([cleft, cright])\n \n",
 'test_patch': "diff --git a/astropy/modeling/tests/test_separable.py b/astropy/modeling/tests/test_separable.py\n--- a/astropy/modeling/tests/test_separable.py\n+++ b/astropy/modeling/tests/test_separable.py\n@@ -28,6 +28,13 @@\n p1 = models.Polynomial1D(1, name='p1')\n \n \n+cm_4d_expected = (np.array([False, False, True, True]),\n+                  np.array([[True,  Tr

In [3]:
# repair from swe-rl/src/swerl/agentless_mini/repair.py 

locs = utils.misc.load_jsonl(args.loc_file)
prev_o = (utils.misc.load_jsonl(args.output_file) if os.path.exists(args.output_file) else [])

all_instance_ids = set(swe_bench_data["instance_id"])
locs = [loc for loc in locs if loc["instance_id"] in all_instance_ids]

print(all_instance_ids)
print(locs)
print(f"Loaded {len(locs)} locations from {args.loc_file}")


{'django__django-13741', 'django__django-17087', 'astropy__astropy-12907', 'scikit-learn__scikit-learn-25747'}
[{'instance_id': 'astropy__astropy-12907', 'found_files': ['astropy/modeling/separable.py']}, {'instance_id': 'django__django-17087', 'found_files': ['django/db/migrations/serializer.py']}, {'instance_id': 'scikit-learn__scikit-learn-25747', 'found_files': ['sklearn/utils/_set_output.py']}]
Loaded 3 locations from /data2/zzd/rl_llm/swe-rl/resources/sweb_lite_gt_loc.jsonl


In [12]:
import asyncio
from tqdm import tqdm
from swerl.agentless_mini.repair import process_loc

os.environ["OPENAI_API_KEY"] = "token-abc123"
os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1"


backend = "http://localhost:8000/v1"
from openai import OpenAI
client = OpenAI(api_key="token-abc123", base_url=backend)
# client = utils.api.OpenAIClient()
# semaphore = asyncio.Semaphore(inference_args.max_concurrent_requests)

results = []
for loc in tqdm(locs):
    idx_and_responses = []
    all_requests,all_found_files, repo_file_contents_dict, instance_id = process_loc_p1(args, inference_args, client, loc, swe_bench_data, prev_o)
    for idx, request in enumerate(all_requests):
        response = client.chat.completions.create(**request)
        idx_and_responses.append((idx, response))
    
    result = process_loc_p2(idx_and_responses, all_found_files, repo_file_contents_dict, instance_id, inference_args)
    results.append(result)
    if result is not None:
        with open(args.output_file, "a") as f:
            f.write(json.dumps(result) + "\n")
    


 33%|███▎      | 1/3 [00:18<00:37, 18.76s/it]

not replaced


 67%|██████▋   | 2/3 [00:26<00:12, 12.28s/it]

not replaced


100%|██████████| 3/3 [00:41<00:00, 13.73s/it]


In [13]:
from swerl.agentless_mini.repair import post_process_repair

            
for i in range(inference_args.num_samples):
    post_process_repair(args, i)

not replaced
Initialized empty Git repository in /data2/zzd/rl_llm/swe-rl/tests/tmp_agentless_notebook/c82c85d0-39db-46b1-b49a-783e65185c41/.git/
[master (root-commit) 59e5fce] initial commit
 1 file changed, 317 insertions(+)
 create mode 100644 astropy/modeling/separable.py
not replaced
Initialized empty Git repository in /data2/zzd/rl_llm/swe-rl/tests/tmp_agentless_notebook/f691e672-b31b-4ee7-a986-0a576ccf48fe/.git/
[master (root-commit) c4861d7] initial commit
 1 file changed, 400 insertions(+)
 create mode 100644 django/db/migrations/serializer.py
Initialized empty Git repository in /data2/zzd/rl_llm/swe-rl/tests/tmp_agentless_notebook/6b2b9e8b-af12-44bb-9b86-e47e636b3c25/.git/


hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 	git config --global init.defaultBranch <name>
hint: 
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint: 
hint: 	git branch -m <name>
hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 	git config --global init.defaultBranch <name>
hint: 
hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and
hint: 'development'. The just-created branch can be renamed via this command:
hint: 
hint: 	git branch -m <name>
hint: Using 'master' as the name for the initial branch. This default branch name
hint: is subject to change. To configure the initial branch name to use in all
hint: 
hint: 

In [11]:
results

[{'instance_id': 'astropy__astropy-12907',
  'prev_content': [[['# Licensed under a 3-clause BSD style license - see LICENSE.rst\n\n"""\nFunctions to determine if a model is separable, i.e.\nif the model outputs are independent.\n\nIt analyzes ``n_inputs``, ``n_outputs`` and the operators\nin a compound model by stepping through the transforms\nand creating a ``coord_matrix`` of shape (``n_outputs``, ``n_inputs``).\n\n\nEach modeling operator is represented by a function which\ntakes two simple models (or two ``coord_matrix`` arrays) and\nreturns an array of shape (``n_outputs``, ``n_inputs``).\n\n"""\n\nimport numpy as np\n\nfrom .core import Model, ModelDefinitionError, CompoundModel\nfrom .mappings import Mapping\n\n\n__all__ = ["is_separable", "separability_matrix"]\n\n\ndef is_separable(transform):\n    """\n    A separability test for the outputs of a transform.\n\n    Parameters\n    ----------\n    transform : `~astropy.modeling.core.Model`\n        A (compound) model.\n\n    R

In [8]:
print(results[0]['traj'][0]['prompt'])

We are currently solving the following issue within our repository. Here is the issue text:
--- BEGIN ISSUE ---
Modeling's `separability_matrix` does not compute separability correctly for nested CompoundModels
Consider the following model:

```python
from astropy.modeling import models as m
from astropy.modeling.separable import separability_matrix

cm = m.Linear1D(10) & m.Linear1D(5)
```

It's separability matrix as you might expect is a diagonal:

```python
>>> separability_matrix(cm)
array([[ True, False],
       [False,  True]])
```

If I make the model more complex:
```python
>>> separability_matrix(m.Pix2Sky_TAN() & m.Linear1D(10) & m.Linear1D(5))
array([[ True,  True, False, False],
       [ True,  True, False, False],
       [False, False,  True, False],
       [False, False, False,  True]])
```

The output matrix is again, as expected, the outputs and inputs to the linear models are separable and independent of each other.

If however, I nest these compound models:
```python


In [9]:
from swerl.agentless_mini.repair import construct_topn_file_context, _post_process_multifile_repair


def get_input_messages(context: str, problem_statement, system: str | None = None) -> list[dict]:
    content = utils.prompts.REPAIR.format(
        problem_statement=problem_statement,
        content=context,
    ).strip()
    messages = [] if system is None else [{"role": "system", "content": system}]
    messages.append({"role": "user", "content": content})
    return messages

# Construct file contents
def _get_file_contents(pred_files: list[str], repo_file_contents_dict) -> dict[str, str]:
    return {
        pred_file: "\n".join(repo_file_contents_dict[pred_file])
        for pred_file in pred_files
        # # This should be always true except for one special GT case:
        # # astropy/coordinates/builtin_frames/itrs_observed_transforms.py
        # # This is fixed in the GT file (12/26/24).
        # if pred_file in repo_file_contents_dict
    }


def process_loc_p1(
    args: Args,
    inf_args: utils.args.InferenceArgs,
    client: utils.api.OpenAIClient,
    loc: dict,
    swe_bench_data: list[dict],
    prev_o: list[dict],
):
    instance_id = loc["instance_id"]
    found = any(o["instance_id"] == instance_id for o in prev_o)

    if found:
        print(f"skipping {instance_id} since patch already generated")
        return None

    # Backward compatibility
    if len(loc["found_files"]) == 0 or isinstance(loc["found_files"][0], str):
        loc["found_files"] = [loc["found_files"]]  # convert to list of list

    if all(len(x) == 0 for x in loc["found_files"]):
        print(f"no files found for {instance_id}")
        return None

    all_found_files: list[list[str]] = loc["found_files"]
    all_found_files = [
        pred_files for pred_files in all_found_files if len(pred_files) > 0
    ]
    # Add remaining found files from the first found few files
    assert len(all_found_files) > 0

    # only keep unique pred_files in all_found_files. all_found_files is a list[list[str]]
    unique_files_set = set[tuple[str, ...]]()
    unique_all_found_files: list[list[str]] = []
    for pred_files in all_found_files:
        # Convert the list to a tuple to make it hashable for the set
        pred_files_tuple = tuple(pred_files)
        if pred_files_tuple not in unique_files_set:
            unique_files_set.add(pred_files_tuple)
            unique_all_found_files.append(pred_files)
    all_found_files = unique_all_found_files[: inf_args.num_samples]

    assert len(all_found_files) > 0
    for index in range(inf_args.num_samples - len(all_found_files)):
        all_found_files.append(all_found_files[index % len(all_found_files)])
    assert len(all_found_files) == inf_args.num_samples

    bench_data = [x for x in swe_bench_data if x["instance_id"] == instance_id][0]
    problem_statement = bench_data["problem_statement"]
    structure = utils.data.get_repo_structure(instance_id)
    repo_file_contents, _, _ = utils.data.get_full_file_paths_and_classes_and_functions(structure)
    repo_file_contents_dict = {path: lines for path, lines in repo_file_contents}

    all_topn_contents = list[str]()
    randomize = inf_args.num_samples > 1
    for pred_files in all_found_files:
        # pred_files = pred_files[: args.top_n]
        # Construct file contents
        topn_content = construct_topn_file_context(
            instance_id,
            pred_files,
            _get_file_contents(pred_files, repo_file_contents_dict),
            args.max_input_tokens,
            randomize=randomize,
        )
        all_topn_contents.append(topn_content)

    all_requests = [
        dict(
            model=inf_args.model,
            messages=get_input_messages(prompt, problem_statement),
            max_tokens=inf_args.max_tokens,
            temperature=inf_args.temperature,
            n=1,
        )
        for prompt in all_topn_contents
    ]
    # del all_topn_contents
    return all_requests, all_found_files, repo_file_contents_dict, instance_id


def process_loc_p2(
    idx_and_responses,
    all_found_files,
    repo_file_contents_dict,
    instance_id,
    inf_args: utils.args.InferenceArgs,
):
    assert len(idx_and_responses) == inf_args.num_samples
    indices = [idx for idx, _ in idx_and_responses]
    assert sorted(indices) == list(range(inf_args.num_samples))

    all_generations = list[str]()
    all_outputs = list[str]()
    all_trajs = list[dict]()
    all_prev_contents = list[list[str]]()
    all_file_names = list[list[str]]()
    for idx, response in idx_and_responses:
        request = all_requests[idx]
        file_contents = _get_file_contents(all_found_files[idx], repo_file_contents_dict)
        prompt = request["messages"][-1]["content"]
        if response is not None:
            output = response.choices[0].message.content
        else:
            output = ""
        all_trajs.append(dict(prompt=prompt, response=output))

        all_generations.append(output)

        # Extract the <solution> part
        output = utils.api.parse_thinking_output(output)

        edited_files, new_contents = _post_process_multifile_repair(
            output, file_contents
        )

        if len(new_contents) == 0:
            all_prev_contents.append([])
            all_file_names.append([])
        else:
            prev_content = [file_contents[edited_file] for edited_file in edited_files]
            all_prev_contents.append(prev_content)
            all_file_names.append(edited_files)
        all_outputs.append(output)
    return dict(
        instance_id=instance_id,
        raw_output=all_outputs,
        all_generations=[all_generations],
        traj=all_trajs,
        prev_content=[all_prev_contents],
        file_names=[all_file_names],
        all_indices=indices,
        all_found_files=all_found_files,
    )
