In [1]:
from pathlib import Path
from collections import defaultdict
from pydantic import BaseModel, Field
import re
import urllib.parse


README_FILE_NAME = "README.md"
DOING_DIR_NAME = "&Doing"

SPLITTER = "------------------------------------------------------"


def generate_markdown_item(file: Path, note_dir: Path):
    file_rp = str(file.relative_to(note_dir).as_posix())
    return f"+ [{file.stem}]({urllib.parse.quote(file_rp, safe="/")})"

In [2]:
class FirstLevelCategoryInfo(BaseModel):
    notes: list[Path] = Field(default=[], description="没有子分类的笔记")
    sub_categories_to_notes: defaultdict[str, list[Path]] = Field(
        default=defaultdict(), description="手动子分类的笔记"
    )
    sub_categories: list[str] = Field(default=[], description="子分类列表")

    def to_markdown(self):
        res = "\n".join(generate_markdown_item(note, Path.cwd()) for note in self.notes)
        res += "\n\n"

        for sub_c, notes in self.sub_categories_to_notes.items():
            res += "#### " + sub_c + "\n\n"
            res += "\n".join(generate_markdown_item(note, Path.cwd()) for note in notes)

        return res


class BaseGroup(BaseModel):
    categories_to_notes: dict[str, FirstLevelCategoryInfo] = Field(
        default=dict(), description="分类和笔记"
    )
    categories: list[str] = Field(default=[], description="分类列表")

    def to_markdown(self, title):
        res = title + "\n\n" + "类别列表：" + "、".join(self.categories) + "\n\n"

        for category, info in self.categories_to_notes.items():
            res += "### " + category + "\n\n"
            res += info.to_markdown()

        return res


class ReadmeContent(BaseModel):
    """README 的文件结构"""

    description: str = Field(default="", description="描述")
    doing_group: BaseGroup = Field(default=BaseGroup(), description="Doing")
    note_group: BaseGroup = Field(default=BaseGroup(), description="列表")

    def parse(self, content: str):
        remain_idx = content.find(SPLITTER) + len(SPLITTER)
        self.description = content[:remain_idx] + "\n\n"
        content_list = content.split("\n")

        i = remain_idx
        cur_group = self.doing_group
        cur_level = 1
        while i < len(content_list):
            line = content_list[i]
            if match := re.search("## 列表", line):
                # 该处理下一组
                cur_group = self.note_group
            elif match := re.search(r"^### (.*)", line):
                # 一级目录
                cur_top_category_info = cur_group.categories_to_notes.setdefault(
                    match.group(1), FirstLevelCategoryInfo()
                )
                cur_level = 1
            elif match := re.search(r"^#### (.*)", line):
                # 二级目录
                cur_sub_category_list = cur_top_category_info.sub_categories_to_notes[
                    match.group(1)
                ]
                cur_level = 2
            elif match := re.search(r"+ \[.*\]\((.*)\)", line):
                # 具体的笔记
                a_note = Path(match.group(1))
                if cur_level == 1:
                    cur_top_category_info.notes.append(a_note)
                else:
                    cur_sub_category_list.append(a_note)
            i += 1

    def update(
        self,
        doing_categories_to_files: defaultdict[str, list[Path]],
        note_categories_to_files: defaultdict[str, list[Path]],
    ):
        """
        要保留原文件中的分类信息和顺序
        1. 遍历原分类和顺序，将不存在的删除
        2. 将新增的放入对应一级目录 no_sub 下
        """

        doing_notes_stems = {
            file.stem for files in doing_categories_to_files.values() for file in files
        }
        notes_notes_stems = {
            file.stem for files in note_categories_to_files.values() for file in files
        }

        doing_exists_notes_stems = set()
        notes_exists_notes_stems = set()

        # 1. 遍历原分类和顺序，将不存在的删除
        # 1.1 Doing
        for first_category_infos in self.doing_group.categories_to_notes.values():
            first_category_infos.notes = [
                note
                for note in first_category_infos.notes
                if note.stem in doing_notes_stems
            ]
            # 更新
            doing_exists_notes_stems.union(
                set(note for note in first_category_infos.notes)
            )

            sub_categories_to_notes = first_category_infos.sub_categories_to_notes
            for sub_category in sub_categories_to_notes.keys():
                sub_categories_to_notes[sub_category] = [
                    note
                    for note in sub_categories_to_notes[sub_category]
                    if note.stem in doing_notes_stems
                ]
                # 更新
                doing_exists_notes_stems.union(
                    set(note for note in sub_categories_to_notes[sub_category])
                )
        # 1.2 Note
        for first_category_infos in self.note_group.categories_to_notes.values():
            first_category_infos.notes = [
                note
                for note in first_category_infos.notes
                if note.stem in notes_notes_stems
            ]
            # 更新
            notes_exists_notes_stems.union(
                set(note for note in first_category_infos.notes)
            )

            sub_categories_to_notes = first_category_infos.sub_categories_to_notes
            for sub_category in sub_categories_to_notes.keys():
                sub_categories_to_notes[sub_category] = [
                    note
                    for note in sub_categories_to_notes[sub_category]
                    if note.stem in notes_notes_stems
                ]
                # 更新
                notes_exists_notes_stems.union(
                    set(note for note in sub_categories_to_notes[sub_category])
                )

        # 2. 将新增的放入对应一级目录 no_sub 下
        new_doing = doing_notes_stems - doing_exists_notes_stems
        new_note = notes_notes_stems - notes_exists_notes_stems
        for category, notes in doing_categories_to_files.items():
            for note in notes:
                if note.stem in new_doing:
                    self.doing_group.categories_to_notes.setdefault(
                        category, FirstLevelCategoryInfo()
                    ).notes.append(note)
        for category, notes in note_categories_to_files.items():
            for note in notes:
                if note.stem in new_note:
                    self.note_group.categories_to_notes.setdefault(
                        category, FirstLevelCategoryInfo()
                    ).notes.append(note)

    def generate_categories(self):
        # Doing
        doing_group = self.doing_group
        doing_group.categories = list(doing_group.categories_to_notes.keys())
        for firstLevelCategoryInfo in doing_group.categories_to_notes.values():
            firstLevelCategoryInfo.sub_categories = list(
                firstLevelCategoryInfo.sub_categories_to_notes.keys()
            )
        # 列表
        note_group = self.note_group
        note_group.categories = list(note_group.categories_to_notes.keys())
        for firstLevelCategoryInfo in note_group.categories_to_notes.values():
            firstLevelCategoryInfo.sub_categories = list(
                firstLevelCategoryInfo.sub_categories_to_notes.keys()
            )

    def to_markdown(self):
        return (
            self.description
            + self.doing_group.to_markdown("## Doing")
            + self.note_group.to_markdown("## 列表")
        )

In [3]:
def valid_file(file_name: str):
    """
    无用文件，主要是 . 开头的 隐藏文件夹 和 Doing 文件夹
    """
    return file_name != DOING_DIR_NAME and not file_name.startswith(".")


def get_all_file_names_and_categories(dir: Path):
    """获取Path目录下，所有的文件名列表和分类列表

    Args:
        dir (Path): 目录
    Returns:
        {}: 分类 -> 文件列表
    """
    category_to_files = defaultdict(list)
    if not dir.exists():
        return category_to_files

    files = [file for file in dir.iterdir() if valid_file(file.name)]
    for file in files:
        file_stem = file.stem
        category = file_stem.split("-")[0] if "-" in file_stem else ""
        category_to_files[category].append(file)

    return category_to_files


def generate_markdown_item(file: Path, note_dir: Path):
    return f"+ [{file.stem}]({file.relative_to(note_dir)})"


def concat_dir_files(dir: Path):
    """
    生成 Path 下所有文件名的列表

    Returns:
        str, str: 拼接好的文件列表，和分类信息
    """
    if not dir.exists():
        return "", ""
    items = [file.stem for file in dir.iterdir() if valid_file(file.name)]
    # items 排序，让文件列表稳定
    items.sort()
    categories = {item.split("-")[0] for item in items if "-" in item}
    return "\n".join(f"+ {item}" for item in items), "、".join(categories)


def update_readme_file(git_path: Path):
    """
    更新当前笔记仓库下的 README.md 文件

    每个笔记仓库，主要是以下2个文件夹，需要整理到 README 中：
        - &Doing: 近期正在处理的一些东西
        - 笔记仓库同名文件夹 note_dir: 已完成的笔记，同时作为坚果云同步的笔记文件夹
    """

    git_name = git_path.name

    if not git_path.exists():
        print("*    当前机器不存在目录: " + git_name)
        return

    readme_path = git_path / README_FILE_NAME
    note_dir = git_path / git_name

    # 1. 读取老 README 文件内容
    with open(readme_path, "r", encoding="utf-8") as file:
        last_readme_content = file.read()
    readme_content = ReadmeContent()
    readme_content.parse(last_readme_content)

    # 2.1 Doing 目录下所有的内容
    doing_categories_to_files = get_all_file_names_and_categories(
        note_dir / DOING_DIR_NAME
    )

    # 2.2 笔记目录下所有的一级目录名（不包含 Doing）
    note_categories_to_files = get_all_file_names_and_categories(note_dir)

    # 3. 更新
    readme_content.update(doing_categories_to_files, note_categories_to_files)

    # 4. 生成新的 readme 文件内容
    new_content = readme_content.to_markdown()

    # 5. 写回文件
    with open(readme_path, "w", encoding="utf-8", newline="\n") as file:
        file.write(new_content)
    print(git_name + " 的 README 已更新完毕")

In [4]:
def valid_file(file_name: str):
    """
    无用文件，主要是 . 开头的 隐藏文件夹 和 Doing 文件夹
    """
    return file_name != DOING_DIR_NAME and not file_name.startswith(".")


def get_all_file_names_and_categories(dir: Path):
    """获取Path目录下，所有的文件名列表和分类列表

    Args:
        dir (Path): 目录
    Returns:
        {}: 分类 -> 文件列表
    """
    category_to_files = defaultdict(list)
    if not dir.exists():
        return category_to_files

    files = [file for file in dir.iterdir() if valid_file(file.name)]
    for file in files:
        file_stem = file.stem
        category = file_stem.split("-")[0] if "-" in file_stem else ""
        category_to_files[category].append(file)

    return category_to_files


def generate_markdown_item(file: Path, note_dir: Path):
    return f"+ [{file.stem}]({file.relative_to(note_dir)})"


def concat_dir_files(dir: Path):
    """
    生成 Path 下所有文件名的列表

    Returns:
        str, str: 拼接好的文件列表，和分类信息
    """
    if not dir.exists():
        return "", ""
    items = [file.stem for file in dir.iterdir() if valid_file(file.name)]
    # items 排序，让文件列表稳定
    items.sort()
    categories = {item.split("-")[0] for item in items if "-" in item}
    return "\n".join(f"+ {item}" for item in items), "、".join(categories)

In [5]:
from pathlib import Path

git_path = Path.cwd()
git_name = git_path.name


readme_path = git_path / README_FILE_NAME
note_dir = git_path / git_name


# 2.1 Doing 目录下所有的内容
doing_categories_to_files = get_all_file_names_and_categories(
    note_dir / DOING_DIR_NAME
)

# 2.2 笔记目录下所有的一级目录名（不包含 Doing）
note_categories_to_files = get_all_file_names_and_categories(note_dir)


In [6]:
r = ReadmeContent()
r.update(doing_categories_to_files, note_categories_to_files)
r.generate_categories()
print(r.to_markdown())

## Doing

类别列表：&移动过来、、计算机

### &移动过来

+ [&移动过来-可能还需要整理一下的](Note\&Doing\&移动过来-可能还需要整理一下的)

### 

+ [--3-Git](Note\&Doing\--3-Git)
+ [Alfred](Note\&Doing\Alfred)
+ [Java学习路线](Note\&Doing\Java学习路线)
+ [领域驱动设计](Note\&Doing\领域驱动设计)

### 计算机

+ [计算机-log4j2线程block问题](Note\&Doing\计算机-log4j2线程block问题.md)
+ [计算机-python-uv](Note\&Doing\计算机-python-uv.md)
+ [计算机-Python用Matplotlib画图](Note\&Doing\计算机-Python用Matplotlib画图.md)

## 列表

类别列表：工作、计算机

### 工作

+ [工作-5Why分析法](Note\工作-5Why分析法.md)
+ [工作-对标](Note\工作-对标.md)
+ [工作-职业规划](Note\工作-职业规划.md)

### 计算机

+ [计算机-Extended Backus–Naur form(EBNF)](Note\计算机-Extended Backus–Naur form(EBNF).md)
+ [计算机-IDEA](Note\计算机-IDEA)
+ [计算机-Java泛型类型推断链式调用时无效](Note\计算机-Java泛型类型推断链式调用时无效.md)
+ [计算机-Jupyter Notebook](Note\计算机-Jupyter Notebook)
+ [计算机-macOS](Note\计算机-macOS.md)
+ [计算机-mac_shell_iterm2_fish](Note\计算机-mac_shell_iterm2_fish)
+ [计算机-Markdown](Note\计算机-Markdown.md)
+ [计算机-markwhen](Note\计算机-markwhen)
+ [计算机-mermaid](Note\计算机-mermaid)
+ [计算机-Mockito文档总结](Note\计算机-Mocki

In [7]:
print(r.model_dump_json())

{"description":"","doing_group":{"categories_to_notes":{"&移动过来":{"notes":["d:\\chen\\Note\\Note\\&Doing\\&移动过来-可能还需要整理一下的"],"sub_categories_to_notes":{},"sub_categories":[]},"":{"notes":["d:\\chen\\Note\\Note\\&Doing\\--3-Git","d:\\chen\\Note\\Note\\&Doing\\Alfred","d:\\chen\\Note\\Note\\&Doing\\Java学习路线","d:\\chen\\Note\\Note\\&Doing\\领域驱动设计"],"sub_categories_to_notes":{},"sub_categories":[]},"计算机":{"notes":["d:\\chen\\Note\\Note\\&Doing\\计算机-log4j2线程block问题.md","d:\\chen\\Note\\Note\\&Doing\\计算机-python-uv.md","d:\\chen\\Note\\Note\\&Doing\\计算机-Python用Matplotlib画图.md"],"sub_categories_to_notes":{},"sub_categories":[]}},"categories":["&移动过来","","计算机"]},"note_group":{"categories_to_notes":{"工作":{"notes":["d:\\chen\\Note\\Note\\工作-5Why分析法.md","d:\\chen\\Note\\Note\\工作-对标.md","d:\\chen\\Note\\Note\\工作-职业规划.md"],"sub_categories_to_notes":{},"sub_categories":[]},"计算机":{"notes":["d:\\chen\\Note\\Note\\计算机-Extended Backus–Naur form(EBNF).md","d:\\chen\\Note\\Note\\计算机-IDEA","d:\\chen\\Note\\N