Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .DS_Store
Binary file not shown.
18 changes: 18 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[run]
source = app
omit =
*/__init__.py
*/tests/*
*/__pycache__/*
app/main.py
app/config/settings.py

[report]
exclude_lines =
pragma: no cover
def __repr__
if self.debug:
raise AssertionError
raise NotImplementedError
if __name__ == .__main__.:
if TYPE_CHECKING:
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,11 @@ cython_debug/
marimo/_static/
marimo/_lsp/
__marimo__/
# macOS
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
33 changes: 16 additions & 17 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,19 @@ repos:
types: [python]

# 4. 重要检查(仅在push时)
# - repo: local
# hooks:
# - id: pytest
# name: pytest
# language: system
# entry: uv
# args: ["run", "pytest", "-q"]
# pass_filenames: false
# stages: [pre-push]

# - id: pip-audit
# name: pip-audit
# language: system
# entry: uv
# args: ["run", "pip-audit", "--strict"]
# pass_filenames: false
# stages: [pre-push]
# - repo: local
# hooks:
# - id: pytest
# name: pytest
# language: system
# entry: uv
# args: ["run", "pytest", "-q"]
# pass_filenames: false
# stages: [pre-push]
# - id: pip-audit
# name: pip-audit
# language: system
# entry: uv
# args: ["run", "pip-audit", "--strict"]
# pass_filenames: false
# stages: [pre-push]
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ help:
@echo " lint ✨ 检查代码并自动修复问题"
@echo " type 🔍 类型检查"
@echo " test 🧪 运行测试并生成覆盖率报告"
@echo " cov 🧪 运行测试并打开覆盖率报告"
@echo " run ▶️ 启动开发服务器"
@echo " pre-commit 🔄 运行预提交检查"
@echo " audit 🛡️ 扫描依赖中的安全漏洞"
Expand Down Expand Up @@ -65,7 +66,12 @@ check: fmt lint type

.PHONY: test
test:
@uv run pytest -q --cov=app --cov-report=term-missing --cov-report=xml
@uv run pytest -v --cov=app --cov-report=term-missing --cov-report=xml

.PHONY: cov
cov: test
@echo "🌐 打开覆盖率报告..."
@open htmlcov/index.html || xdg-open htmlcov/index.html || echo "请打开: htmlcov/index.html"

.PHONY: audit
audit:
Expand Down
40 changes: 21 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,18 @@ make run

## 📋 开发命令

| 命令 | 描述 |
|------|------|
| `make setup` | 🚀 一键设置完整开发环境 |
| `make check` | ✅ 运行所有代码质量检查 |
| `make test` | 🧪 运行测试并生成覆盖率报告 |
| `make run` | ▶️ 启动开发服务器 |
| `make fmt` | 🎨 格式化代码 |
| `make lint` | ✨ 检查代码并自动修复 |
| `make type` | 🔍 类型检查 |
| `make audit` | 🛡️ 扫描安全漏洞 |
| `make clean` | 🧹 清理临时文件 |
| 命令 | 描述 |
|--------------|-----------------|
| `make setup` | 🚀 一键设置完整开发环境 |
| `make check` | ✅ 运行所有代码质量检查 |
| `make test` | 🧪 运行测试并生成覆盖率报告 |
| `make cov` | 🧪 运行测试并打开覆盖率报告 |
| `make run` | ▶️ 启动开发服务器 |
| `make fmt` | 🎨 格式化代码 |
| `make lint` | ✨ 检查代码并自动修复 |
| `make type` | 🔍 类型检查 |
| `make audit` | 🛡️ 扫描安全漏洞 |
| `make clean` | 🧹 清理临时文件 |

## 🔧 API 接口

Expand All @@ -103,14 +104,15 @@ make run

### 主要端点

| 端点 | 方法 | 描述 |
|------|------|------|
| `/` | GET | API 根路径和信息 |
| `/api/v1/health` | GET | 健康检查 |
| `/api/v1/documents/upload-file` | POST | 本地文件上传 |
| `/api/v1/documents/upload-from-url` | POST | 从COS URL上传 |
| `/api/v1/search` | POST | 文档搜索 |
| `/api/v1/tasks/{task_id}` | GET | 查询任务状态 |
| 端点 | 方法 | 描述 |
|-------------------------------------|------|----------------|
| `/` | GET | API 根路径和信息 |
| `/api/v1/health` | GET | 健康检查 |
| `/api/v1/documents/upload-file` | POST | 本地文件上传 |
| `/api/v1/documents/upload-from-url` | POST | 从COS URL上传 |
| `/api/v1/documents/save` | POST | 以JSON格式字符串上传文档 |
| `/api/v1/search` | POST | 文档搜索 |
| `/api/v1/tasks/{task_id}` | GET | 查询任务状态 |

### 健康检查
```bash
Expand Down
4 changes: 2 additions & 2 deletions app/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class ElasticsearchSettings(BaseModel):
"""Elasticsearch 相关配置"""

url: str
metadata_index: str = "file_metadatas"
chunk_index: str = "file_chunks"
metadata_index_suffix: str
chunk_index_suffix: str
request_timeout: int = 15


Expand Down
13 changes: 13 additions & 0 deletions app/domain/document.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass, field
from typing import Any


@dataclass
class Document:
index_prefix: str
path: str
size: int
category: str | None = None
Expand Down
52 changes: 38 additions & 14 deletions app/domain/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,54 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass, field
from dataclasses import dataclass
from enum import Enum
from typing import Any


@dataclass
class SearchRequest:
"""封装搜索请求,新增 mode 和 filters。"""
class SearchMode(str, Enum):
"""基础查询模式"""

VECTOR = "vector" # 向量搜索
TERM = "term" # 精确匹配
MATCH = "match" # 模糊匹配


@dataclass(frozen=True)
class SearchCondition:
"""搜索条件 - 值对象"""

query: str
top_k: int = 5
filters: dict[str, Any] | None = field(default_factory=dict)
field_name: str
mode: SearchMode
value: str | int | float | bool


@dataclass(frozen=True)
class SearchParameters:
"""搜索参数 - 值对象"""

index_name: str
conditions: list[SearchCondition]
limit: int = 10
filters: dict[str, Any] | None = None


@dataclass
class ContextChunk:
"""定义一个上下文块,用于最终返回结果。"""
class DocumentResult:
"""文档结果 - 值对象"""

text: str
file_metadata_id: str
content: dict[str, Any]
score: float
id: str | None = None


@dataclass
class SearchResponse:
"""定义最终的搜索响应格式。"""
class SearchResult:
"""搜索结果 - 聚合根"""

documents: list[DocumentResult]
total_count: int
search_time_ms: int

context: list[ContextChunk]
def is_empty(self) -> bool:
return len(self.documents) == 0
4 changes: 1 addition & 3 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from app.utils.loaders.dispatcher import DispatcherLoader
from app.utils.rerankers.bge import BgeReranker
from app.utils.splitters import RecursiveCharacterTextSplitter
from app.web.handler import DocumentHandler
from app.web.document import DocumentHandler

# 配置标准日志
logging.basicConfig(
Expand Down Expand Up @@ -62,8 +62,6 @@
splitter=splitter,
embedder=embedder,
reranker=reranker,
metadata_index=settings.elasticsearch.metadata_index,
chunk_index=settings.elasticsearch.chunk_index,
settings=settings,
)
logger.info("✅ 核心服务组件初始化成功。")
Expand Down
Loading