In [1]:
from transformers import BertForSequenceClassification
from transformers import BertTokenizer
import torch

# 准备输入文本和对应的标签
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
input_text = ["I love this movie!", "This movie is horrible.","No comment for the movie."]
#labels = [1, 0]  # 1表示正面情感，0表示负面情感

# 使用tokenizer对输入文本进行编码：将文本转换为模型可以理解的向量（input_ids和attention_mask）
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
encoded_inputs = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt")

# 将编码结果输入到模型中，得到分类结果：
with torch.no_grad():
    outputs = model(**encoded_inputs)
    logits = outputs.logits
    # 对logits进行argmax操作，得到预测的类别
    predictions = torch.argmax(logits, dim=-1)

print(predictions)


  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([1, 1, 1])


In [16]:
# 我们从transformers库中导入GPT2LMHeadModel和GPT2Tokenizer
# GPT2LMHeadModel是GPT-2模型的一个版本，专门用于语言建模任务
# GPT2Tokenizer是用于GPT-2模型的分词器
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# 一个预训练的GPT-2模型。("gpt2")表示我们使用的是预训练的"gpt2"模型
model = GPT2LMHeadModel.from_pretrained("gpt2")

# 定义一个字符串text，它将作为我们生成文本的起始
text = "Once upon a time,"
#text = "很久很久以前"
text = "Long long time ago,"

# 使用同样的预训练模型"gpt2"的分词器对输入文本进行编码。编码后的结果被存储在input_ids中：
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
input_ids = tokenizer.encode(text, return_tensors="pt")

# 将编码后的input_ids输入到模型中，然后生成文本
# model.generate函数的参数max_length=50表示生成的文本的最大长度为50
# num_return_sequences=1表示我们只生成一条序列
# 生成的文本被存储在outputs中
# 然后我们使用分词器的batch_decode函数将生成的文本解码，得到我们可以阅读的文本
with torch.no_grad():
    outputs = model.generate(input_ids, max_length=50, num_return_sequences=1)
    generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)

for i, generated_text in enumerate(generated_texts):
    print(f"Generated text {i + 1}: {generated_text}")



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated text 1: Long long time ago, the world was a land of great wealth and great wealth of people. Now, it is a land of great wealth and great wealth of people.

The world is a land of great wealth and great wealth of people.
