### 准备数据

In [17]:
# 准备数据
import pandas as pd
df=pd.read_excel("weibo.xlsx")

df=df.replace(1,2)
df=df.replace(0,1)
df=df.replace(-1,0)
comments=df['comment'].tolist()
labels=df['label'].tolist()
length=len(df)
split_num=int(length*0.7)
train_data=df[:split_num]
test_data=df[split_num:]

### 训练模型

In [20]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.optim import AdamW
import torch

# 1. 加载预训练的tokenizer和模型
tokenizer = BertTokenizer.from_pretrained('D:\\code\\model\\bert-base-chinese')
model = BertForSequenceClassification.from_pretrained(
    "D:\\code\\model\\bert-base-chinese", # 使用12层的BERT模型
    num_labels = 3, # 二分类任务（比如情感分析）
    output_attentions = False, # 模型是否返回注意力权重
    output_hidden_states = False, # 模型是否返回所有隐藏状态
)
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=model.to(device)
# 2. 准备数据
# 假设我们有一些文本数据和对应的标签
texts = train_data['comment'].tolist()
texts=[str(text)for text in texts]
labels = train_data['label'].tolist()  # 1代表积极情绪，0代表消极情绪
# 使用tokenizer处理文本数据
inputs = tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors='pt')

# 把标签转换成Tensor
labels = torch.tensor(labels)

# 3. 创建一个DataLoader
data = list(zip(inputs['input_ids'], inputs['attention_mask'], labels))
dataloader = DataLoader(data, batch_size=2)

# 4. 微调模型
# 设置优化器
optimizer = AdamW(model.parameters(), lr=1e-5)

# 开始训练
model.train()
for epoch in range(3):  # 这里只做3个epoch的训练
    for batch in dataloader:
        input_ids,attention_mask,labels = batch
        input_ids=input_ids.to(device)
        attention_mask=attention_mask.to(device)
        labels=labels.to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

# 5. 保存微调后的模型
torch.save(model, 'complete_model.pth')



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at D:\code\model\bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### 对已经训好的模型做测试

In [9]:
import torch
model=torch.load("D:\\code\\情感分析\\complete_model.pth")
model.eval()
test=["I love the book","I hate the book"]
tokrnizer=BertTokenizer.from_pretrained('D:\\code\\model\\bert-base-uncased')
test=tokenizer(test,padding=True,truncation=True,max_length=128,return_tensors="pt")

with torch.no_grad():
    predictions=model(**test)

logits=predictions.logits
_,pre_label=torch.max(logits,1)
# 保存预测的标签
predicted_labels_list = pre_label.tolist()
# ...保存到文件或其他操作
predicted_labels_list

  model=torch.load("D:\\code\\情感分析\\complete_model.pth")


[1, 1]