In [1]:
%pip install transformers==4.45.2

Collecting transformers==4.45.2
  Downloading transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.45.2-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.46.3
    Uninstalling transformers-4.46.3:
      Successfully uninstalled transformers-4.46.3
Successfully installed transformers-4.45.2


使用 pipeline 生成

In [5]:
%pip install torch==2.5.1



In [7]:
from transformers import pipeline
import torch

def data():
  for i in range(10):
    yield f"My example {i}"

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
pipe = pipeline(model="openai-community/gpt2", device=device ,eos_token_id=2, pad_token_id=2)

generated_characters = 0
for out in pipe(data()):
    generated_characters += len(out[0]["generated_text"])

print(generated_characters)




cpu
2023


# [datasets](https://github.com/huggingface/datasets/releases)

In [12]:
%pip install fsspec==2024.9.0



In [1]:
%pip install datasets==3.1.0



In [8]:
import torch

from transformers import pipeline

# device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"
model_name = "Qwen/Qwen2.5-0.5B-Instruct"

messages = [ {"role": "user", "content": "请写一首赞美秋天的五言绝句"},]

pipe = pipeline("text-generation", model=model_name, device=device, max_new_tokens=100)

response = pipe(messages)

print(response[-1]['generated_text'][-1]['content'])


秋风起兮叶纷飞，黄菊开兮红叶稀。
人生若只如初见，何事秋风悲画眉。




```
pipe = pipeline("text-generation", model="Qwen/Qwen2.5-0.5B-Instruct")
result = pipe(messages)
```

第一个参数指定了它的用途，这里是文本生成（text-generation），pipeline 会根据不同的用途进行不同的管道配置。第二个参数是模型，在这个例子里面，我们使用的模型是阿里的通义千问（Qwen），引用模型的方式就是“用户名 / 模型名”，在这里就是“Qwen/Qwen2.5-0.5B-Instruct”。


pipeline 模型的第一个参数指定了用途。除了像大模型做文本生成，Hugging Face 提供了大量的不同模型，可以帮助我们完成其它的工作。比如后面这个例子：

In [10]:
%pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sacremoses
Successfully installed sacremoses-0.1.1


In [11]:
import torch
from transformers import pipeline

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=device)
result = pipe("今天天气真好，我想出去玩。")
print(result[-1]['translation_text'])

It's a nice day. I want to go out.


# 用底层实现调用模型

In [13]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

messages = [
    {"role": "user", "content": "请写一首赞美春天的诗，要求不包含春字"},
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)

春风拂面柳依依，
万物复苏绿意齐。
花开烂漫映山河，
生机盎然满人间。

鸟语花香醉人心，
孩童嬉戏乐无边。
老者悠然自得处，
岁月静好待时光。


**解析上面代码**

把输入转换成 Token。



```
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
```



大模型根据输入生成相应的内容。

```
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512
)
```



生成的结果是 Token，还需要把它转成文本。

```
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
```



# 流式输出

In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

messages = [
    {"role": "user", "content": "请写一首赞美秋天的五言绝句"},
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512,
    streamer=streamer,
)

秋风送爽至，落叶铺金地。
丰收喜气浓，硕果挂枝头。


用到了 TextStreamer，它会直接把生成结果输出到控制台上。如果我们要实现一个控制台应用，它是可以用的。但更多的情况下，我们需要拿到输出结果，再去做相应的处理，比如，服务端把生成的内容发送给客户端。这种情况下，我们可以使用 TextIteratorStreamer，下面是一个例子：

threading 使用的是异步

In [17]:
from threading import Thread

from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, TextIteratorStreamer

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

messages = [
    {"role": "user", "content": "请写一首赞美秋天的五言绝句"},
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(model_inputs, streamer=streamer, max_new_tokens=20)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()

for text in streamer:
    print(text)

秋
风
送
爽
至

，落叶
满
长
街
。

丹
桂
飘
香
远

，丰收
喜
气



给 pipeline 增加流式输出的能力：

In [19]:
import torch
from threading import Thread
from transformers import pipeline, TextIteratorStreamer

device = "cuda" if torch.cuda.is_available() else "cpu"

messages = [
    {"role": "user", "content": "请写一首赞美秋天的五言绝句"},
]

pipe = pipeline("text-generation", model="Qwen/Qwen2.5-0.5B-Instruct", device=device, max_new_tokens=100)

streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True)

generation_kwargs = dict(text_inputs=messages, streamer=streamer)
thread = Thread(target=pipe, kwargs=generation_kwargs)
thread.start()

for text in streamer:
    print(text)

秋
风
送
爽
至

，落叶
铺
金
地
。

稻
香
四
溢
来

，丰收
喜
气
盈


。
