Skip to content

Commit

Permalink
删除重复的引入和纠正拼写错误 (#2599)
Browse files Browse the repository at this point in the history
* 1.删除重复的引入
2.拼写错误

* 1.参数或者文档拼写错误纠正
2.doc下的faq、install已经删除,更新为ES部署指南,考虑到doc下的文档经常更新,即使扫描doc文件夹,也可能为空的情况,readme.md大概率不会删除。
  • Loading branch information
tiandiweizun committed Jan 11, 2024
1 parent b653c25 commit 3da68b5
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 14 deletions.
11 changes: 5 additions & 6 deletions init_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import nltk
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
from datetime import datetime
import sys


if __name__ == "__main__":
Expand Down Expand Up @@ -50,11 +49,11 @@
)
parser.add_argument(
"-i",
"--increament",
"--increment",
action="store_true",
help=('''
update vector store for files exist in local folder and not exist in database.
use this option if you want to create vectors increamentally.
use this option if you want to create vectors incrementally.
'''
)
)
Expand Down Expand Up @@ -100,7 +99,7 @@

if args.clear_tables:
reset_tables()
print("database talbes reseted")
print("database tables reset")

if args.recreate_vs:
create_tables()
Expand All @@ -110,8 +109,8 @@
import_from_db(args.import_db)
elif args.update_in_db:
folder2db(kb_names=args.kb_name, mode="update_in_db", embed_model=args.embed_model)
elif args.increament:
folder2db(kb_names=args.kb_name, mode="increament", embed_model=args.embed_model)
elif args.increment:
folder2db(kb_names=args.kb_name, mode="increment", embed_model=args.embed_model)
elif args.prune_db:
prune_db_docs(args.kb_name)
elif args.prune_folder:
Expand Down
8 changes: 4 additions & 4 deletions server/knowledge_base/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def file_to_kbfile(kb_name: str, files: List[str]) -> List[KnowledgeFile]:

def folder2db(
kb_names: List[str],
mode: Literal["recreate_vs", "update_in_db", "increament"],
mode: Literal["recreate_vs", "update_in_db", "increment"],
vs_type: Literal["faiss", "milvus", "pg", "chromadb"] = DEFAULT_VS_TYPE,
embed_model: str = EMBEDDING_MODEL,
chunk_size: int = CHUNK_SIZE,
Expand All @@ -97,7 +97,7 @@ def folder2db(
recreate_vs: recreate all vector store and fill info to database using existed files in local folder
fill_info_only(disabled): do not create vector store, fill info to db using existed files only
update_in_db: update vector store and database info using local files that existed in database only
increament: create vector store and database info for local files that not existed in database only
increment: create vector store and database info for local files that not existed in database only
"""

def files2vs(kb_name: str, kb_files: List[KnowledgeFile]):
Expand Down Expand Up @@ -142,15 +142,15 @@ def files2vs(kb_name: str, kb_files: List[KnowledgeFile]):
files2vs(kb_name, kb_files)
kb.save_vector_store()
# 对比本地目录与数据库中的文件列表,进行增量向量化
elif mode == "increament":
elif mode == "increment":
db_files = kb.list_files()
folder_files = list_files_from_folder(kb_name)
files = list(set(folder_files) - set(db_files))
kb_files = file_to_kbfile(kb_name, files)
files2vs(kb_name, kb_files)
kb.save_vector_store()
else:
print(f"unspported migrate mode: {mode}")
print(f"unsupported migrate mode: {mode}")


def prune_db_docs(kb_names: List[str]):
Expand Down
7 changes: 3 additions & 4 deletions tests/test_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
# setup test knowledge base
kb_name = "test_kb_for_migrate"
test_files = {
"faq.md": str(root_path / "docs" / "faq.md"),
"install.md": str(root_path / "docs" / "install.md"),
"readme.md": str(root_path / "readme.md"),
}


Expand Down Expand Up @@ -56,13 +55,13 @@ def test_recreate_vs():
assert doc.metadata["source"] == name


def test_increament():
def test_increment():
kb = KBServiceFactory.get_service_by_name(kb_name)
kb.clear_vs()
assert kb.list_files() == []
assert kb.list_docs() == []

folder2db([kb_name], "increament")
folder2db([kb_name], "increment")

files = kb.list_files()
print(files)
Expand Down

0 comments on commit 3da68b5

Please sign in to comment.