In [2]:
from datasets import load_datasets

ds = load_dataset("positivethoughts/rewrite_10k")

ImportError: cannot import name 'load_datasets' from 'datasets' (e:\work\LLM_6907_Project\.venv\lib\site-packages\datasets\__init__.py)

In [None]:
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import json
from llm_dev import LLM, BaseLLM
import os
from datetime import datetime
from eval_system import save_report, EvaluationConfig, EvaluationSystem

@dataclass
class ExecutionStep:
    """Enhanced execution step data class"""
    step_id: int
    description: str
    prompt: str
    expected_output: Optional[str] = None
    key_points: Optional[List[str]] = None
    constraints: Optional[List[str]] = None
    dependencies: Optional[List[str]] = None
    result: Optional[str] = None
    check_result: Optional[Dict[str, Any]] = None

class TaskDecomposition:
    def __init__(self, llm: LLM, output_language: str = "English"):
        """
        Initialize TaskDecomposition with language control
        
        Args:
            llm (LLM): LLM instance
            output_language (str): Desired output language (e.g., "English", "Chinese", "Spanish")
        """
        self.llm: BaseLLM = llm
        self.output_language = output_language
        self.conversation_history: List[Dict[str, str]] = []
        
    def _add_to_history(self, role: str, content: str):
        """Add conversation to history"""
        self.conversation_history.append({"role": role, "content": content})
        
    def generate_plan(self, task_description: str) -> List[ExecutionStep]:
        """Generate detailed task execution plan"""
        planning_prompt = f"""
        Please analyze the following task and create a detailed, step-by-step execution plan.
        Please provide all output in {self.output_language}.

        Task Description: {task_description}

        Generate a comprehensive execution plan in JSON format following these guidelines:

        1. Step Structure:
        {{
            "steps": [
                {{
                    "step_id": 1,
                    "description": "Detailed step description",
                    "prompt": "Specific execution instructions",
                    "expected_output": "Description of what this step should produce",
                    "key_points": ["Key elements to address", "Important aspects to include"],
                    "constraints": ["Any limitations or requirements to consider"],
                    "dependencies": ["References to previous steps if any, should be a int only, represent the step_id"]
                }}
            ]
        }}

        2. Requirements for Each Step:
        - Description should be specific and actionable
        - Prompt should provide clear guidance and context
        - Include all necessary details for execution
        - Consider dependencies on previous steps
        - Specify quality criteria and expectations

        3. Step Planning Considerations:
        - Break down complex tasks into manageable pieces
        - Ensure logical progression between steps
        - Include specific details and examples where relevant
        - Consider edge cases and potential challenges
        - Maintain focus on overall task objectives

        4. Content Guidelines:
        - Be specific rather than generic
        - Include measurable outcomes
        - Provide context for each step
        - Specify any required research or reference materials
        - Include quality checks and validation criteria

        Remember: Generate all content in {self.output_language}, but make sure the key in JSON stay in English to match each other.
        Please ensure the generated plan is detailed enough that each step can be executed without requiring additional clarification.
        """
        
        self._add_to_history("user", planning_prompt)
        
        plan_json = self.llm.generate_json(
            prompt=planning_prompt,
            schema={"steps": list}
        )
        
        if not plan_json or "steps" not in plan_json:
            raise ValueError("Failed to generate plan")
            
        self._add_to_history("assistant", json.dumps(plan_json, ensure_ascii=False, indent=2))
        
        steps = []
        for step in plan_json["steps"]:
            steps.append(ExecutionStep(
                step_id=step["step_id"],
                description=step["description"],
                prompt=step["prompt"],
                expected_output=step.get("expected_output"),
                key_points=step.get("key_points"),
                constraints=step.get("constraints"),
                dependencies=step.get("dependencies")
            ))
            
        return steps
        
    def execute_step(self, step: ExecutionStep, previous_results: List[str]) -> str:
        """Execute a single step with enhanced context and guidance"""
        dependencies_context = self._build_dependencies_context(step, previous_results)
        
        context_prompt = f"""
        Task Execution Step {step.step_id}
        Please provide all output in {self.output_language}.

        Step Description: {step.description}

        Previous Context:
        {dependencies_context}

        Expected Output: {step.expected_output}

        Key Points to Address:
        {self._format_list(step.key_points)}

        Constraints to Consider:
        {self._format_list(step.constraints)}

        Detailed Instructions:
        {step.prompt}

        Requirements for Execution:
        1. Address all key points explicitly
        2. Follow all specified constraints
        3. Maintain alignment with previous steps
        4. Ensure output format matches expectations
        5. Focus on quality and completeness

        Remember: Generate your response in {self.output_language}.
        Please execute this step and provide a detailed response that meets all requirements.
        """
        
        self._add_to_history("user", context_prompt)
        
        result = self.llm.generate_text(
            prompt=context_prompt,
            max_tokens=2000
        )
        
        self._add_to_history("assistant", result)
        return result

    def check_step_result(self, step: ExecutionStep, result: str, task_description: str) -> Dict[str, Any]:
        """Enhanced result validation with detailed feedback"""
        check_prompt = f"""
        Please perform a comprehensive evaluation of the step execution result.
        Please provide all output in {self.output_language}.

        Original Task: {task_description}

        Step Information:
        - Description: {step.description}
        - Expected Output: {step.expected_output}
        - Key Points: {self._format_list(step.key_points)}
        - Constraints: {self._format_list(step.constraints)}

        Execution Result:
        {result}

        Please evaluate the result based on the following criteria and return a detailed analysis in JSON format:

        {{
            "passed": boolean,
            "scores": {{
                "completeness": (0-10),  // Did it address all required points?
                "constraints_met": (0-10),  // Were all constraints followed?
                "quality": (0-10),  // Overall quality of the output
                "coherence": (0-10)  // Logical flow and connection with other steps
            }},
            "analysis": {{
                "strengths": ["list", "of", "strengths"],
                "weaknesses": ["list", "of", "weaknesses"],
                "missing_points": ["key points", "not addressed"],
                "violated_constraints": ["constraints", "not met"]
            }},
            "improvement_suggestions": ["specific", "actionable", "suggestions"],
            "overall_feedback": "Detailed explanation of the evaluation"
        }}

        Remember: Generate all analysis and feedback in {self.output_language}.
        Provide specific examples and references when discussing strengths or weaknesses.
        """
        
        self._add_to_history("user", check_prompt)
        
        check_result = self.llm.generate_json(
            prompt=check_prompt,
            schema={
                "passed": bool,
                "scores": dict,
                "analysis": dict,
                "improvement_suggestions": list,
                "overall_feedback": str
            }
        )
        
        self._add_to_history("assistant", json.dumps(check_result, ensure_ascii=False, indent=2))
        return check_result

    def _build_dependencies_context(self, step: ExecutionStep, previous_results: List[str]) -> str:
        """Build context based on step dependencies"""
        if not step.dependencies or not previous_results:
            return "No dependencies on previous steps."
            
        context = "Relevant context from previous steps:\n\n"
        for dep in step.dependencies:
            step_num = int(dep) - 1
            if 0 <= step_num < len(previous_results):
                context += f"From {dep}:\n{previous_results[step_num]}\n\n"
        return context

    def _format_list(self, items: Optional[List[str]]) -> str:
        """Format list items for prompt display"""
        if not items:
            return "None specified"
        return "\n".join(f"- {item}" for item in items)

    def execute_task(self, task_description: str, enable_checking: bool = True) -> Dict[str, Any]:
        """Execute complete task with enhanced monitoring and control"""
        self.conversation_history = []
        
        steps = self.generate_plan(task_description)
        results = []
        execution_log = []
        
        for step in steps:
            result = self.execute_step(step, results)
            step.result = result
            
            if enable_checking:
                check_result = self.check_step_result(step, result, task_description)
                step.check_result = check_result
                
                if not check_result["passed"]:
                    retry_prompt = self._generate_retry_prompt(step, check_result)
                    result = self.llm.generate_text(
                        prompt=retry_prompt,
                        max_tokens=2000
                    )
                    step.result = result
                    step.check_result = self.check_step_result(step, result, task_description)
            
            results.append(result)
            execution_log.append({
                "step_id": step.step_id,
                "description": step.description,
                "expected_output": step.expected_output,
                "result": step.result,
                "check_result": step.check_result
            })
        
        final_result = self._generate_final_result(task_description, results, steps)
        
        return {
            "final_result": final_result,
            "execution_log": execution_log,
            "conversation_history": self.conversation_history
        }

    def _generate_retry_prompt(self, step: ExecutionStep, check_result: Dict[str, Any]) -> str:
        """Generate detailed retry prompt based on validation feedback"""
        return f"""
        The previous execution of step {step.step_id} requires improvement.
        Please provide all output in {self.output_language}.

        Original Description: {step.description}
        Expected Output: {step.expected_output}

        Previous Result: {step.result}

        Evaluation Feedback:
        - Scores: {json.dumps(check_result['scores'], indent=2)}
        - Missing Points: {', '.join(check_result['analysis']['missing_points'])}
        - Violated Constraints: {', '.join(check_result['analysis']['violated_constraints'])}

        Improvement Requirements:
        {self._format_list(check_result['improvement_suggestions'])}

        Please revise the output addressing all identified issues while maintaining:
        1. Original task objectives
        2. Consistency with previous steps
        3. All specified constraints
        4. Required quality standards

        Remember: Generate the improved version in {self.output_language}.
        Provide an improved version that addresses all feedback points.
        """

    def _generate_final_result(self, task_description: str, results: List[str], steps: List[ExecutionStep]) -> str:
        """Generate final result with comprehensive context integration"""
        final_integration_prompt = f"""
        Please create a comprehensive final result integrating all completed steps.
        Please provide all output in {self.output_language}.

        Original Task Description:
        {task_description}

        Step Results Summary:
        {self._format_steps_summary(steps, results)}

        Requirements for Final Integration:
        1. Ensure perfect alignment with original task requirements
        2. Maintain logical flow and coherence across all components
        3. Address all key points from individual steps
        4. Resolve any inconsistencies between steps
        5. Provide a polished and professional final output

        Remember: Generate the final result in {self.output_language}.
        Create a cohesive final result that successfully achieves all original task objectives while maintaining the quality and detail level of individual steps.
        """

        self._add_to_history("user", final_integration_prompt)
        final_result = self.llm.generate_text(
            prompt=final_integration_prompt,
            max_tokens=3000
        )
        self._add_to_history("assistant", final_result)
        return final_result

    def _format_steps_summary(self, steps: List[ExecutionStep], results: List[str]) -> str:
        """Format detailed summary of all steps and their results"""
        summary = ""
        for step, result in zip(steps, results):
            summary += f"\nStep {step.step_id}: {step.description}\n"
            summary += f"Expected Output: {step.expected_output}\n"
            summary += f"Result: {result}\n"
            if step.check_result:
                summary += f"Quality Scores: {json.dumps(step.check_result.get('scores', {}), indent=2)}\n"
            summary += "-" * 80 + "\n"
        return summary

# Usage example
def example_usage():
    llm = LLM(model_type='claude-sonnet', verbose=True)
    # Initialize with desired output language
    decomposer = TaskDecomposition(llm, output_language="Chinese")

#     task = """
#     将下面的段落改写成歌剧的语言风格，保持原文内容、情感和剧情不变，使用现代汉语：
# 今天全没月光，我知道不妙。早上小心出门，赵贵翁的眼色便怪：似乎怕我，似乎想害我。还有七八个人，交头接耳的议论我，张着嘴，对我笑了一笑；我便从头直冷到脚根，晓得他们布置，都已妥当了。
# 我可不怕，仍旧走我的路。前面一伙小孩子，也在那里议论我；眼色也同赵贵翁一样，脸色也铁青。我想我同小孩子有什么仇，他也这样。忍不住大声说，“你告诉我！”他们可就跑了。
# 我想：我同赵贵翁有什么仇，同路上的人又有什么仇；只有廿年以前，把古久先生的陈年流水簿子⑶，踹了一脚，古久先生很不高兴。赵贵翁虽然不认识他，一定也听到风声，代抱不平；约定路上的人，同我作冤对。但是小孩子呢？那时候，他们还没有出世，何以今天也睁着怪眼睛，似乎怕我，似乎想害我。这真教我怕，教我纳罕而且伤心。
# 我明白了。这是他们娘老子教的！
#     """
    
    task = """
    将下面的段落改写成长篇叙事诗（Epic Poem）的文体，保持原文内容、情感和剧情不变，使用现代汉语：
赫克托尔决心等待阿喀琉斯；他把盾牌靠在塔的窗台上，等待他的医生。赫克托尔无法避免与阿喀琉斯的战斗。普里阿摩斯的儿子担心特洛伊人会指责他依靠他的力量摧毁了特洛伊。毕竟，我建议他波利达曼特在阿基里斯参战之前到特洛伊的军队避难。现在，赫克托耳还剩下一件事——与阿喀琉斯交战，要么赢，要么死。赫克托尔也有这样的想法：不带武器去见阿喀琉斯，答应他归还美丽的埃琳娜和所有从墨涅拉俄斯，并与他们一起献出伟大特洛伊所有财富的一半。赫克托尔立即拒绝了这个想法。他知道阿喀琉斯不会和他达成协议，他会手无寸铁地杀死他，就像一个无能为力的女人。
阿喀琉斯越来越近了。恐惧抓住了赫克托，他从特洛伊周围的强大儿子 Pelea 身边跑开。在他的身后，风雨如磐的阿喀琉斯，就像鹰追虚弱的鸽子一样。英雄们绕着特洛伊转了三圈。
英雄们在暴风雨中奔跑。赫克托耳好几次想躲在墙上，给特洛伊人用箭击退珀琉斯之子的机会，但阿喀琉斯不让他靠近墙边。如果阿波罗神没有向赫克托耳注入力量，伟大的阿喀琉斯早就超越了普里阿摩斯的儿子。当英雄们第四次跑过斯卡曼德的钥匙时，神雷者在金色的天平上扔了两批死亡，一个给阿喀琉斯，一个给赫克托耳。 Hector 的命运落入了阴暗的Hades 王国。阿波罗神离开赫克托耳，女神雅典娜-帕拉斯接近阿喀琉斯。她命令英雄停下来，并答应他战胜赫克托。女神本人化身为赫克托尔的兄弟德伊弗布斯，出现在赫克托耳面前。她说服他与珀琉斯的儿子作战，并承诺提供帮助。赫克托停下来。英雄们齐聚一堂。赫克托第一个惊呼：
- 我将不再，珀琉斯之子，在逃亡中寻求救赎！让我们战斗，看看你是杀了我还是我赢了。但是，让我们在战斗之前召唤神灵来见证吧！如果雷神给了我胜利，我保证不会羞辱你的身体。也履行这份合同。
但阿基里斯威胁地回答他：
- 不！不要给我合同，可恶的敌人！正如狮子与人或狼与羊之间的契约是不可能的，我们之间也是不可能的。不！用你所有的力量把自己振作起来，记住所有你你在军事领域的艺术。你没有救恩！你会为我的朋友 Patroclus 和我其他被你杀死的朋友流下的鲜血付出代价。
    """

    # task = """
    # 写一篇1200字的中文论文，详细地分析transformer和attention内在原理和技术内容。
    # """

    result = decomposer.execute_task(task, enable_checking=True)

    # Print execution log
    for entry in result["execution_log"]:
        print(f"\nStep {entry['step_id']}:")
        print(f"Description: {entry['description']}")
        print(f"Expected Output: {entry['expected_output']}")
        if entry['check_result']:
            print(f"Quality Scores: {entry['check_result']['scores']}")
            print(f"Improvement Suggestions: {entry['check_result']['improvement_suggestions']}")

    print("\nFinal Result:")
    print(result["final_result"])
    
    multiple_step_result = result["final_result"]
    one_step_result = llm.generate_text(task)
    
    result["final_simple_result"] = one_step_result
    
    # # Configure evaluation system
    # config = EvaluationConfig(
    #     committee_llms=["claude-sonnet", "gpt", "claude-haiku"],
    #     chief_llm="claude-sonnet",
    #     extractor_llm="gpt-4o",
    #     verbose=True
    # )
    
    # Configure evaluation system
    config = EvaluationConfig(
        committee_llms=["claude-sonnet", "gpt-4o", "llama3-70b", "qwen-72b"],
        chief_llm="claude-sonnet",
        extractor_llm="claude-sonnet",
        verbose=True
    )
    
    # Initialize system
    system = EvaluationSystem(config)
    
    # Content: Content to evaluate
    # Task_info: Task-related information and requirements
    
    # Execute evaluation
    try:
        report_multiple_step = system.evaluate(content = multiple_step_result, task_info = task)
        report_one_step = system.evaluate(content = one_step_result, task_info = task)
        
        
    except Exception as e:
        print(f"Error during evaluation: {e}")
    
    result['task'] = task
    
    result['report_multiple_step'] = report_multiple_step
    
    result['report_one_step'] = report_one_step
    
    # Generate timestamp
    timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M")
    
    # Create filename
    filename = f"result/{timestamp}.json"
    
    # Save to file
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(result, f, ensure_ascii=False, indent=2)

In [2]:
example_usage()

Generated JSON:
{
  "steps": [
    {
      "step_id": 1,
      "description": "分析原文结构和关键情节",
      "prompt": "仔细阅读原文，标注以下要素：主要人物、关键事件、情感变化、场景转换",
      "expected_output": "一份包含故事结构、人物关系和情节发展的详细分析文档",
      "key_points": [
        "赫克托尔与阿喀琉斯的对峙",
        "赫克托尔的内心挣扎",
        "追逐战的过程",
        "神灵的介入",
        "最终对话的转折"
      ],
      "constraints": [
        "保持原文的历史背景准确性",
        "确保所有关键情节都被记录"
      ],
      "dependencies": []
    },
    {
      "step_id": 2,
      "description": "确定叙事诗的格式和风格",
      "prompt": "设计适合长篇叙事诗的格式，包括：韵律结构、段落划分、语言风格",
      "expected_output": "叙事诗创作的格式指南和风格规范",
      "key_points": [
        "现代汉语表达方式",
        "诗歌韵律规则",
        "段落结构设计",
        "修辞手法运用"
      ],
      "constraints": [
        "使用现代汉语",
        "保持史诗般的庄重感",
        "确保可读性和流畅性"
      ],
      "dependencies": [
        1
      ]
    },
    {
      "step_id": 3,
      "description": "创作开篇和场景描写",
      "prompt": "将赫克托尔等待和内心挣扎的场景改写成诗歌形式",
      "expected_output": "叙事诗的开篇部分，包含人物心理描写和场景铺垫",
      "

In [10]:
task = """
    将下面的段落改写成歌剧的语言风格，保持原文内容、情感和剧情不变，使用现代汉语：
今天全没月光，我知道不妙。早上小心出门，赵贵翁的眼色便怪：似乎怕我，似乎想害我。还有七八个人，交头接耳的议论我，张着嘴，对我笑了一笑；我便从头直冷到脚根，晓得他们布置，都已妥当了。
我可不怕，仍旧走我的路。前面一伙小孩子，也在那里议论我；眼色也同赵贵翁一样，脸色也铁青。我想我同小孩子有什么仇，他也这样。忍不住大声说，“你告诉我！”他们可就跑了。
我想：我同赵贵翁有什么仇，同路上的人又有什么仇；只有廿年以前，把古久先生的陈年流水簿子⑶，踹了一脚，古久先生很不高兴。赵贵翁虽然不认识他，一定也听到风声，代抱不平；约定路上的人，同我作冤对。但是小孩子呢？那时候，他们还没有出世，何以今天也睁着怪眼睛，似乎怕我，似乎想害我。这真教我怕，教我纳罕而且伤心。
我明白了。这是他们娘老子教的！
    """


llm = LLM(model_type='claude', verbose=True)
llm.generate_text(task)


歌剧版本如下:

今夜无月光照耀，
我心中不安惶恐。
早晨出门谨慎行走,
赵老爷眼神怪异:
似乎害怕我,似乎想伤害我。
还有七八个人,
窃窃私语议论我,
对我露出诡异笑容;
我从头到脚感到寒意,
知道他们已经布置好了阴谋。

但我不会惧怕,
仍然走我的路。
前面一群小孩子,
也在那里议论我;
眼神与赵老爷如出一辙,
脸色铁青。
我想我与小孩子有何仇恨,
他们也如此对待我。
忍不住大声质问:"告诉我为什么!"
他们立即逃散而去。

我思索:我与赵老爷有何仇怨,
与路人又有何冤仇;
只有二十年前,
我曾踹了一脚古老先生的账簿,
古老先生很不高兴。
虽然赵老爷不认识他,
一定也听闻风声,
代为不平;
约定路人与我作对。
但是小孩子呢?
他们那时还未出世,
如今为何也睁着怪异的眼睛,
似乎害怕我,似乎想伤害我。
这真让我惶恐,
让我困惑且伤心。

我明白了,
这都是他们父母教导的!


'歌剧版本如下:\n\n今夜无月光照耀，\n我心中不安惶恐。\n早晨出门谨慎行走,\n赵老爷眼神怪异:\n似乎害怕我,似乎想伤害我。\n还有七八个人,\n窃窃私语议论我,\n对我露出诡异笑容;\n我从头到脚感到寒意,\n知道他们已经布置好了阴谋。\n\n但我不会惧怕,\n仍然走我的路。\n前面一群小孩子,\n也在那里议论我;\n眼神与赵老爷如出一辙,\n脸色铁青。\n我想我与小孩子有何仇恨,\n他们也如此对待我。\n忍不住大声质问:"告诉我为什么!"\n他们立即逃散而去。\n\n我思索:我与赵老爷有何仇怨,\n与路人又有何冤仇;\n只有二十年前,\n我曾踹了一脚古老先生的账簿,\n古老先生很不高兴。\n虽然赵老爷不认识他,\n一定也听闻风声,\n代为不平;\n约定路人与我作对。\n但是小孩子呢?\n他们那时还未出世,\n如今为何也睁着怪异的眼睛,\n似乎害怕我,似乎想伤害我。\n这真让我惶恐,\n让我困惑且伤心。\n\n我明白了,\n这都是他们父母教导的!'