In [2]:
import logging
from typing import Dict, List, TypedDict

from langchain.chat_models import init_chat_model
from pydantic import BaseModel, Field

import dotenv

dotenv.load_dotenv("../.env")

llm = init_chat_model("google_genai:gemini-2.0-flash")
class Chapter(BaseModel):
    """定义单个章节的结构"""
    chapter_title: str = Field(description="章节的标题")
    knowledge_points: List[str] = Field(description="章节包含的知识点列表")

class SyllabusStructure(BaseModel):
    """定义整个大纲的结构，它由多个章节组成"""
    chapters: List[Chapter] = Field(description="大纲的章节列表")

class LessonPlanState(TypedDict):
    """
    定义我们流水线的“状态背包”，它会在所有节点之间传递。
    """
    # 初始输入
    raw_syllabus: str
    # knowledge_base_retriever: Any # 在真实场景中你会传入你的检索器

    # 中间处理数据
    parsed_syllabus: List[Dict]
    current_chapter_index: int
    chapter_results: List[Dict]

    # 最终输出
    final_lesson_plan: str
SYLLABUS_PARSE_PROMPT = "请仔细分析以下课程大纲文本，并将其内容提取为JSON格式。请识别出所有章节标题和每个章节下的核心知识点列表。\n\n大纲文本如下：\n---\n{raw_syllabus}"


DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.

In [None]:
def parse_syllabus(state: LessonPlanState) -> LessonPlanState:
    """
    解析原始的大纲字符串，转换为结构化的章节列表。
    """

    raw_syllabus = state['raw_syllabus']

    structured_llm = llm.with_structured_output(SyllabusStructure)

    structured_syllabus = structured_llm.invoke(SYLLABUS_PARSE_PROMPT.format(raw_syllabus=raw_syllabus))

    parsed_data = [chapter.dict() for chapter in structured_syllabus.chapters]

    logging.info(f"Parsed syllabus: {parsed_data}")

    return {
        'parsed_syllabus': parsed_data,
        'current_chapter_index': 0,
        'chapter_results': []
    }
