# Run ingestion
Ingest all SWE-Bench instances and evaluate the results.

In [1]:
from moatless.benchmark.swebench import load_instances
import os
import json

instance_by_id = load_instances("princeton-nlp/SWE-bench_Lite", split="test")

evaluation_report = "report.jsonl"

previous_instances = {
}

if os.path.exists(evaluation_report):
    with open(evaluation_report, "r") as f:
        for line in f:
            report = json.loads(line)
            previous_instance = instance_by_id[report["instance_id"]]
            previous_instances[previous_instance["repo"]] = previous_instance
            del instance_by_id[report["instance_id"]]

instances = list(instance_by_id.values())
instances = sorted(instances, key=lambda x: x["created_at"])

print(f"Number of instances: {len(instances)}")

In [2]:
from moatless.benchmark.swebench import setup_swebench_repo

def next_instance(instances):
    if not instances:
        return None
    instance = instances.pop(0)
    print(f"Instance: {instance['instance_id']}, {len(instances)} instances left")
    return instance

instance = next_instance(instances)

In [3]:
from moatless.index.settings import IndexSettings
from moatless.index.code_index import CodeIndex
from dotenv import load_dotenv
from moatless.benchmark.swebench import get_repo_dir_name
import os

index_settings = IndexSettings(
    embed_model="voyage-code-2"
)

load_dotenv('../.env')

def get_persist_dir(instance):
    return os.path.join("/tmp/index_store", get_repo_dir_name(instance["instance_id"]))

def create_index(instance):
    previous_instance = previous_instances.get(instance["repo"])
    if previous_instance:
        return CodeIndex.from_persist_dir(get_persist_dir(previous_instance))
    else:
        return CodeIndex(settings=index_settings)

code_index = create_index(instance)

In [4]:
def ingest(code_index, instance):
    repo_path = setup_swebench_repo(instance)
    print(f"Repo path: {repo_path}")

    vectors, indexed_tokens = code_index.run_ingestion(repo_path=repo_path, num_workers=4)
    print(f"Indexed {vectors} vectors and {indexed_tokens} tokens.")
    
    persist_dir = get_persist_dir(instance)
    code_index.persist(persist_dir=persist_dir)
    print(f"Index persisted to {persist_dir}")
    
    previous_instances[instance["repo"]] = instance
    return vectors, indexed_tokens

vectors, indexed_tokens = ingest(code_index, instance)

In [5]:
from moatless.evaluation.utils import calculate_estimated_context_window

def evaluate(code_index, instance):
    results = code_index.find_code(instance["problem_statement"], top_k=1000)
        
    expected_changes, sum_tokens = calculate_estimated_context_window(instance, results)
    all_matching_context_window = None
    any_matching_context_window = None
    
    expected_matches = [context for context in expected_changes if context["context_window"] is not None]
    if expected_matches:
        all_matching_context_window = max(context["context_window"] for context in expected_matches)
        any_matching_context_window = min(context["context_window"] for context in expected_matches)
        
        if len(expected_matches) == len(expected_changes):
            print(f"Found all expected changes within a context window of {all_matching_context_window} tokens, first match at context window {any_matching_context_window}")
        else:
            any_matching_context_window = min(context["context_window"] for context in expected_changes if context["context_window"] is not None)
            print(f"Found {len(expected_matches)} expected changes within a context window {all_matching_context_window} tokens, first match at context window {any_matching_context_window} max context window {sum_tokens} tokens")
        
        
    else:
        print(f"No expected changes found in context window of {sum_tokens} tokens")
        
    for change in expected_changes:
        if change["context_window"] is None:
            print(f"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) not fund, closest match: {change.get('closest_match_lines')}")
        else:
            print(f"Expected change: {change['file_path']} ({change['start_line']}-{change['end_line']}) found at context window {change['context_window']} tokens. Distance: {change['distance']}. Position: {change['position']}")
        
        
    return expected_changes, all_matching_context_window, any_matching_context_window

expected_changes, all_matching_context_window, any_matching_context_window = evaluate(code_index, instance)

In [6]:
import json

def write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, any_matching_context_window):
    with open("report.jsonl", "a") as f:
        f.write(json.dumps({
            "instance_id": instance["instance_id"],
            "vectors": vectors,
            "indexed_tokens": indexed_tokens,
            "all_matching_context_window": all_matching_context_window,
            "any_matching_context_window": any_matching_context_window,
            "expected_changes": expected_changes,
    }) + "\n")
        
write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, any_matching_context_window)

In [None]:

def index_next_instance():
    instance = next_instance(instances)
    while instance:
        code_index = create_index(instance)
        vectors, indexed_tokens = ingest(code_index, instance)
        expected_changes, all_matching_context_window, any_matching_context_window = evaluate(code_index, instance)
        write_report(instance, expected_changes, vectors, indexed_tokens, all_matching_context_window, any_matching_context_window)
        instance = next_instance(instances)

index_next_instance()