In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# 加载模型
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

# 设置 pad_token
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

# 关键词
force_words = ["deep learning", "neural networks"]
force_words_ids = [tokenizer.encode(w, add_special_tokens=False) for w in force_words]

# 构造 JSON prompt
prompt = """
Write a JSON object describing a topic in AI.

{
  "topic": "Artificial Intelligence",
  "keywords": ["deep learning", "neural networks"],
  "description":
"""

# 编码输入
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 生成
output_ids = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_new_tokens=100,
    num_beams=5,
    do_sample=False,
    force_words_ids=force_words_ids,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id,
    no_repeat_ngram_size=3,
    repetition_penalty=1.1
)

# 解码输出
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# 打印结果
print("\nGenerated JSON:\n", output_text)



Generated JSON:
 
Write a JSON object describing a topic in AI.

{
  "topic": "Artificial Intelligence",
  "keywords": ["deep learning", "neural networks"],
  "description":

"Deep learning is a new field of research that uses machine learning algorithms to solve complex problems."

}

Write an AI object describing the topic in the context of the current topic. The object should be a list of topics, and should have the following properties:

Name: The name of the topic

Subject: The subject of the object

Description: The description of the subject

Keywords: The keywords used to describe the topic (e.g.
