In [1]:
import os
os.chdir('..')

import json
from workflows.skeletons import *
from utils import visualize, os_lib

In [2]:
def log(title, content):
    if isinstance(content, dict):
        content = json.dumps(content, indent=4, ensure_ascii=False)
    elif isinstance(content, list):
        _content = ''
        for c in content:
            _content += str(c) + '\n'
        content = _content

    print(
        f"{visualize.TextVisualize.highlight_str(title)}\n"
        f"==========\n"
        f"{content}\n"
        f"----------\n"
    )

In [3]:
# 初始化一个请求
obj = dict(
    topic='如何学习大模型',
    pages=4
)

In [4]:
os.environ['LANG'] = 'ch'
from templates.make_sentences import *

class MakeGenPrompt(Module):
    output_json = {
        "title": "example title",
        "pages": [
            {
                "title": "title for page 1",
                "content": [
                    {
                        "title": "title for paragraph 1",
                        "description": "detail for paragraph 1",
                    },
                    {
                        "title": "title for paragraph 2",
                        "description": "detail for paragraph 2",
                    },
                ],
            },
            {
                "title": "title for page 2",
                "content": [
                    {
                        "title": "title for paragraph 1",
                        "description": "detail for paragraph 1",
                    },
                    {
                        "title": "title for paragraph 2",
                        "description": "detail for paragraph 2",
                    },
                    {
                        "title": "title for paragraph 3",
                        "description": "detail for paragraph 3",
                    },
                ],
            },
        ],
    }

    def on_process(self, obj, debug=False, **kwargs):
        topic = obj['topic']
        pages = obj['pages']

        output_format = json.dumps(self.output_json, ensure_ascii=False)
        prompt = make_skeleton(
            additional_prompts=[
                make_role('文档编写员'),
                f'我要准备1个主题关于"{topic}"的PPT，要求一共写{pages}页，请你根据主题生成详细内容，不要省略。',
                f'\n按这个JSON格式输出\n{output_format}\n只能返回JSON\n',
            ]
        )

        if debug:
            log('模型输入', prompt)

        obj.update(post_kwargs=dict(prompt=prompt))
        return obj

make_gen_prompt = MakeGenPrompt()
obj = make_gen_prompt(obj, debug=True)

[34;1m模型输入[0m

现在你扮演一名文档编写员。我要准备1个主题关于"如何学习大模型"的PPT，要求一共写4页，请你根据主题生成详细内容，不要省略。
按这个JSON格式输出
{"title": "example title", "pages": [{"title": "title for page 1", "content": [{"title": "title for paragraph 1", "description": "detail for paragraph 1"}, {"title": "title for paragraph 2", "description": "detail for paragraph 2"}]}, {"title": "title for page 2", "content": [{"title": "title for paragraph 1", "description": "detail for paragraph 1"}, {"title": "title for paragraph 2", "description": "detail for paragraph 2"}, {"title": "title for paragraph 3", "description": "detail for paragraph 3"}]}]}
只能返回JSON

开始!


----------



In [5]:
# 请求模型
import re
from models.bailian import Model

class LLM(Model):
    def on_process_end(self, obj, debug=False, **kwargs):
        content = obj['content']
        if '```json' in content:
            ret = re.search('```json\n(.+)\n```', content, flags=re.S).group(1)
        else:
            ret = content

        ret = json.loads(ret)

        if debug:
            log('模型输出', json.dumps(ret, ensure_ascii=False, indent=4))

        obj.update(ret=ret)
        return obj

# export API_KEY='xxx'
llm = LLM(
    api_key=os.environ.get('API_KEY'),
    # llm_type='qwen2-7b-instruct'
)
obj = llm(obj, debug=True)

[34;1m模型输出[0m
{
    "title": "如何学习大模型",
    "pages": [
        {
            "title": "引言与大模型概述",
            "content": [
                {
                    "title": "什么是大模型",
                    "description": "大模型，又称大规模预训练模型，是指那些拥有大量参数（通常数十亿至数万亿）的机器学习模型，它们通过在广泛的数据集上进行预训练，能够理解和生成人类语言、代码、图像等多种形式的数据。"
                },
                {
                    "title": "大模型的重要性",
                    "description": "大模型由于其强大的泛化能力和对多种任务的适应性，正逐渐成为AI领域的核心驱动力，影响着自然语言处理、计算机视觉、药物研发等多个行业的发展。"
                }
            ]
        },
        {
            "title": "学习大模型的基础知识",
            "content": [
                {
                    "title": "数学与统计学基础",
                    "description": "理解概率论、线性代数、微积分等数学概念是学习大模型的前提，这些知识有助于深入理解模型的工作原理。"
                },
                {
                    "title": "机器学习与深度学习",
                    "description": "掌握监督学习、无监督学习、神经网络架构、损失函数、优化算法等基础知识，了解深度学习框架如TensorFlow或PyTorch的使用。"
                },
                {
                    "title": "

In [6]:
# pip install python-pptx
from pptx import Presentation

class MakePpt(Module):
    def on_process(self, obj, save_fp='', **kwargs):
        ret = obj['ret']
        save_fp = obj['save_fp']
        ppt = Presentation()

        # PPT首页
        slide = ppt.slides.add_slide(ppt.slide_layouts[0])  # title&subtitle layout
        slide.placeholders[0].text = ret['title']

        # 内容页
        for i, page in enumerate(ret['pages']):
            slide = ppt.slides.add_slide(ppt.slide_layouts[1])  # title&content layout
            # 标题
            slide.placeholders[0].text = page['title']
            # 正文
            for sub_content in page['content']:
                # 一级正文
                sub_title = slide.placeholders[1].text_frame.add_paragraph()
                sub_title.text, sub_title.level = sub_content['title'], 1
                # 二级正文
                sub_description = slide.placeholders[1].text_frame.add_paragraph()
                sub_description.text, sub_description.level = sub_content['description'], 2

        os_lib.mk_parent_dir(save_fp)
        ppt.save(save_fp)
        return ppt
    
make_ppt = MakePpt()
obj.update(save_fp='data_cache/ppt_generator/如何学习大模型.pptx')
obj = make_ppt(obj)

In [7]:
# 串成一条完整的流水线
pipeline = Pipeline(
    make_gen_prompt,
    llm,
    # todo: 添加背景图片的制作
    make_ppt
)

obj = dict(
    topic='周杰伦歌曲鉴赏',
    pages=4,
    save_fp='data_cache/ppt_generator/周杰伦歌曲鉴赏.pptx'
)
obj = pipeline(obj, debug=True)

[34;1m模型输入[0m

现在你扮演一名文档编写员。我要准备1个主题关于"周杰伦歌曲鉴赏"的PPT，要求一共写4页，请你根据主题生成详细内容，不要省略。
按这个JSON格式输出
{"title": "example title", "pages": [{"title": "title for page 1", "content": [{"title": "title for paragraph 1", "description": "detail for paragraph 1"}, {"title": "title for paragraph 2", "description": "detail for paragraph 2"}]}, {"title": "title for page 2", "content": [{"title": "title for paragraph 1", "description": "detail for paragraph 1"}, {"title": "title for paragraph 2", "description": "detail for paragraph 2"}, {"title": "title for paragraph 3", "description": "detail for paragraph 3"}]}]}
只能返回JSON

开始!


----------

[34;1m模型输出[0m
{
    "title": "周杰伦歌曲鉴赏",
    "pages": [
        {
            "title": "周杰伦简介与音乐风格",
            "content": [
                {
                    "title": "周杰伦背景",
                    "description": "周杰伦，华语流行乐坛的领军人物，以其独特的音乐风格和创作才华闻名。自2000年出道以来，发行了多张畅销专辑，影响了一代又一代的听众。"
                },
                {
                    "title": "音乐风格特色",
     