<a href="https://colab.research.google.com/github/beautiful-doggie/create_ppt/blob/main/load_deepseek_coder_in_8bits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --quiet bitsandbytes
!pip install --quiet --upgrade transformers # Install latest version of transformers
!pip install --quiet --upgrade accelerate
!pip install --quiet sentencepiece

^C
^C
^C
^C


In [None]:
!pip install transformers bitsandbytes accelerate

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name='deepseek-ai/deepseek-coder-6.7b-base'
model=AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    load_in_8bit=True,
    torch_dtype=torch.float16
)
tokenizer=AutoTokenizer.from_pretrained(model_name)

In [None]:
import json
import re
import os

topic="红黑树"

# Prompt 模板：一个字段一个小 prompt
prompts = {
    "title": "你是一位计算机专业讲师，正在准备《数据结构》中关于“{topic}”的一页幻灯片。\n请输出本页幻灯片的标题，字符串即可。",
    "outline": "请列出关于“{topic}”的幻灯片内容提纲，使用 bullet point 每行一个要点。",
    "explanation": "请用简洁清晰的语言，写一段适合课堂讲解的文字，解释“{topic}”的核心概念和原理。",
    "code_example": "请给出一个用 Python 实现“{topic}”的代码示例，代码应简洁且注释清晰。",
    "image_suggestion": "请描述一张适合插入在“{topic}”幻灯片中的示意图，例如图的结构、操作流程等。"
}


In [None]:
# 创建保存目录
from pathlib import Path

topic_dir = Path(f"./slides/{topic}")
topic_dir.mkdir(parents=True, exist_ok=True)

# 生成每个字段的内容并保存为 txt
for key, prompt_template in prompts.items():
    prompt = prompt_template.format(topic=topic)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output_ids = model.generate(
        **inputs,
        max_new_tokens=1024,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=False
    )

    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()

    with open(topic_dir / f"{key}.txt", "w", encoding="utf-8") as f:
        f.write(output_text)

    print(f"已保存：{topic}/{key}.txt")


In [None]:
# 解析文本，合成为 JSON
import os
import json

def parse_outline(text):
    lines = text.strip().splitlines()
    return [
        line.strip("-•●。1234567890.：: ").strip()
        for line in lines if line.strip()
    ]

result = {}
for key in prompts.keys():
    path = topic_dir / f"{key}.txt"
    if path.exists():
        with open(path, "r", encoding="utf-8") as f:
            content = f.read().strip()
            if key == "outline":
                result[key] = parse_outline(content)
            else:
                result[key] = content
    else:
        result[key] = ""

with open(topic_dir / f"{topic}.json", "w", encoding="utf-8") as f:
    json.dump(result, f, ensure_ascii=False, indent=4)

print(f"已合成为 JSON:{topic}/{topic}.json")
