# Test AutoFileGroup
This notebook tests the grouping functionality in AutoFileGroup class

In [1]:
import os
import sys
import yaml
from loguru import logger

from autocoder.agent.auto_filegroup import AutoFileGroup
import byzerllm

## Create test data directory and files

In [3]:
# Create test directory
test_dir = ".tmp/test_auto_filegroup"
os.makedirs(test_dir, exist_ok=True)

# Create test YAML files
test_data = [
    {
        "name": "001_feature1.yml",
        "content": {
            "query": "Add input validation to login function",
            "urls": [
                "/src/auth/login.py",
                "/src/utils/validation.py"
            ]
        }
    },
    {
        "name": "002_feature2.yml",
        "content": {
            "query": "Implement password reset functionality",
            "urls": [
                "/src/auth/password_reset.py",
                "/src/auth/email.py"
            ]
        }
    },
    {
        "name": "003_feature3.yml", 
        "content": {
            "query": "Add caching to database queries",
            "urls": [
                "/src/db/cache.py",
                "/src/db/queries.py"
            ]
        }
    },
    {
        "name": "004_feature4.yml",
        "content": {
            "query": "Implement rate limiting for API endpoints",
            "urls": [
                "/src/api/rate_limiter.py",
                "/src/api/middleware.py"
            ]
        }
    },
    {
        "name": "005_feature5.yml",
        "content": {
            "query": "Add email verification after signup",
            "urls": [
                "/src/auth/email.py",
                "/src/auth/signup.py"
            ]
        }
    }
]

# Write test files
for item in test_data:
    file_path = os.path.join(test_dir,"actions", item["name"])
    with open(file_path, "w", encoding="utf-8") as f:
        yaml.dump(item["content"], f)

print(f"Created {len(test_data)} test files in {test_dir}")

Created 5 test files in .tmp/test_auto_filegroup


## Initialize LLM and test grouping

In [1]:
import os
import sys
import yaml
from loguru import logger

from autocoder.agent.auto_filegroup import AutoFileGroup
import byzerllm

llm = byzerllm.ByzerLLM.from_default_model(model="deepseek_chat")

# Create AutoFileGroup instance
grouper = AutoFileGroup(llm=llm, project_dir="/Users/allwefantasy/projects/auto-coder")

# Test file grouping
groups = grouper.group_files()

print("\nFile Grouping Results:")
print("=====================")
for idx, group in enumerate(groups, 1):
    print(f"\nGroup {idx}: {group.name}")
    print("Queries:")
    for query in group.queries:
        print(f"- {query}")
    print("Files:")
    for url in group.urls:
        print(f"- {url}")

[32m2024-12-21 11:52:18.684[0m | [1mINFO    [0m | [36mbyzerllm.utils.connect_ray[0m:[36mconnect_cluster[0m:[36m48[0m - [1mJDK 21 will be used (/Users/allwefantasy/.auto-coder/jdk-21.0.2.jdk/Contents/Home)...[0m
2024-12-21 11:52:18,738	INFO worker.py:1564 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379...
2024-12-21 11:52:18,759	INFO worker.py:1740 -- Connected to Ray cluster. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m



File Grouping Results:

Group 1: 搜索功能实现
Queries:
- 在 autocoder/common 目录中添加一个 search.py ，参考 search_with_lepton，实现一个 Search 类，支持 bing,google,baidu等endpoint,获取搜索结果，返回 contexts,对该contexts生成一个pydantic对象。
Files:
- https://raw.githubusercontent.com/leptonai/search_with_lepton/main/search_with_lepton.py

Group 2: LLM重排序功能
Queries:
- llm_retrank.py 中，postprocess_nodes 的具体逻辑是，1. 通过 map 给 nodes 里的每一个元素添加一个全局 index 序号 2. 按choice_batch_size对 nodes 进行切分，3. 对 rerank 的结果进行解析，得到文档序号和得分。（注意，尽量解析的鲁棒性）4. 对每个 batch 进行 rerank，然后调用 rerank 函数，确保 rerank 里的序号是全局的 5. 最后合并结果,按照得分排序，返回结果 6. 在 ./tests/ 目录下添加 llm_rerank 的测试用例，确保覆盖所有的逻辑。
Files:
- /Users/allwefantasy/projects/llama_index/llama-index-core/llama_index/core/schema.py

Group 3: Collection和Description支持
Queries:
- 请阅读 command_args.py，simple_rag.py,common/__init__.py 三个文件的源码，我们希望实现如下的新功能：1. 支持 collection 的概念，collection是一个字符串，用来区分不同的索引，索引的时候，可以指定collection，查询的时候也可以指定collection。2. 支持 description 的概念，description是一个字符串，用来描述索引的内容,方便查询的时候，根据description来进行路由到对应的

## Cleanup

In [6]:
# Remove test directory and files
import shutil
shutil.rmtree(test_dir)
print(f"\nCleaned up test directory: {test_dir}")


Cleaned up test directory: .tmp/test_auto_filegroup
