In [1]:
import os
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch

# 确认torch安装成功
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# 创建结果目录的绝对路径
output_dir = 'D:\\js\\results'
os.makedirs(output_dir, exist_ok=True)

# 加载IMDB数据集
dataset = load_dataset("imdb")
train_dataset = dataset['train']
test_dataset = dataset['test']

# 加载预训练的BERT分词器
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# 对数据进行编码
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

encoded_train_dataset = train_dataset.map(tokenize_function, batched=True)
encoded_test_dataset = test_dataset.map(tokenize_function, batched=True)

# 加载预训练的BERT模型
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# 定义训练参数
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
)

# 创建Trainer实例
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_test_dataset,
)

# 训练模型
trainer.train()

# 评估模型
results = trainer.evaluate()
print("Evaluation results:", results)


  from .autonotebook import tqdm as notebook_tqdm


PyTorch version: 2.3.1+cu121
CUDA available: True


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  attn_output = torch.nn.functional.scaled_dot_product_attention(
  5%|▌         | 500/9375 [01:56<36:44,  4.03it/s]

{'loss': 0.3878, 'grad_norm': 27.762123107910156, 'learning_rate': 1.8933333333333334e-05, 'epoch': 0.16}


 11%|█         | 1000/9375 [04:05<35:01,  3.98it/s] 

{'loss': 0.3145, 'grad_norm': 12.772541999816895, 'learning_rate': 1.7866666666666666e-05, 'epoch': 0.32}


 16%|█▌        | 1500/9375 [06:16<33:43,  3.89it/s]  

{'loss': 0.3037, 'grad_norm': 23.96112632751465, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.48}


 21%|██▏       | 2000/9375 [08:28<32:20,  3.80it/s]  

{'loss': 0.2793, 'grad_norm': 22.48014259338379, 'learning_rate': 1.5733333333333334e-05, 'epoch': 0.64}


 27%|██▋       | 2500/9375 [10:39<29:19,  3.91it/s]  

{'loss': 0.2595, 'grad_norm': 6.8537373542785645, 'learning_rate': 1.4666666666666666e-05, 'epoch': 0.8}


 32%|███▏      | 3000/9375 [12:51<27:32,  3.86it/s]  

{'loss': 0.2568, 'grad_norm': 36.839447021484375, 'learning_rate': 1.3600000000000002e-05, 'epoch': 0.96}


                                                     
 33%|███▎      | 3126/9375 [17:42<135:00:15, 77.78s/it]

{'eval_loss': 0.23431654274463654, 'eval_runtime': 257.1525, 'eval_samples_per_second': 97.219, 'eval_steps_per_second': 12.152, 'epoch': 1.0}


 37%|███▋      | 3500/9375 [19:22<25:51,  3.79it/s]    

{'loss': 0.1949, 'grad_norm': 0.062145404517650604, 'learning_rate': 1.2533333333333336e-05, 'epoch': 1.12}


 43%|████▎     | 4000/9375 [21:36<23:38,  3.79it/s]  

{'loss': 0.161, 'grad_norm': 38.90977096557617, 'learning_rate': 1.1466666666666668e-05, 'epoch': 1.28}


 48%|████▊     | 4500/9375 [23:49<21:20,  3.81it/s]  

{'loss': 0.1499, 'grad_norm': 0.1815585345029831, 'learning_rate': 1.04e-05, 'epoch': 1.44}


 53%|█████▎    | 5000/9375 [26:03<19:25,  3.76it/s]  

{'loss': 0.1565, 'grad_norm': 0.05382585898041725, 'learning_rate': 9.333333333333334e-06, 'epoch': 1.6}


 59%|█████▊    | 5500/9375 [28:17<16:51,  3.83it/s]  

{'loss': 0.1642, 'grad_norm': 10.506442070007324, 'learning_rate': 8.266666666666667e-06, 'epoch': 1.76}


 64%|██████▍   | 6000/9375 [30:30<14:25,  3.90it/s]  

{'loss': 0.149, 'grad_norm': 0.47605034708976746, 'learning_rate': 7.2000000000000005e-06, 'epoch': 1.92}


                                                     
 67%|██████▋   | 6250/9375 [35:51<13:46,  3.78it/s]

{'eval_loss': 0.24927932024002075, 'eval_runtime': 254.5336, 'eval_samples_per_second': 98.219, 'eval_steps_per_second': 12.277, 'epoch': 2.0}


 69%|██████▉   | 6500/9375 [37:02<13:12,  3.63it/s]   

{'loss': 0.1099, 'grad_norm': 0.01797865703701973, 'learning_rate': 6.133333333333334e-06, 'epoch': 2.08}


 75%|███████▍  | 7000/9375 [39:07<10:42,  3.70it/s]

{'loss': 0.0739, 'grad_norm': 0.08309272676706314, 'learning_rate': 5.0666666666666676e-06, 'epoch': 2.24}


 80%|████████  | 7500/9375 [41:21<07:53,  3.96it/s]

{'loss': 0.0622, 'grad_norm': 63.336181640625, 'learning_rate': 4.000000000000001e-06, 'epoch': 2.4}


 85%|████████▌ | 8000/9375 [43:41<06:32,  3.51it/s]

{'loss': 0.0705, 'grad_norm': 0.018638920038938522, 'learning_rate': 2.9333333333333338e-06, 'epoch': 2.56}


 91%|█████████ | 8500/9375 [46:03<03:43,  3.91it/s]

{'loss': 0.0759, 'grad_norm': 0.48176220059394836, 'learning_rate': 1.8666666666666669e-06, 'epoch': 2.72}


 96%|█████████▌| 9000/9375 [48:18<01:32,  4.03it/s]

{'loss': 0.0745, 'grad_norm': 0.9126028418540955, 'learning_rate': 8.000000000000001e-07, 'epoch': 2.88}


                                                   
100%|██████████| 9375/9375 [54:34<00:00,  2.86it/s]


{'eval_loss': 0.3125171959400177, 'eval_runtime': 271.67, 'eval_samples_per_second': 92.023, 'eval_steps_per_second': 11.503, 'epoch': 3.0}
{'train_runtime': 3274.0532, 'train_samples_per_second': 22.907, 'train_steps_per_second': 2.863, 'train_loss': 0.17580392740885417, 'epoch': 3.0}


100%|██████████| 3125/3125 [04:24<00:00, 11.83it/s]

Evaluation results: {'eval_loss': 0.3125171959400177, 'eval_runtime': 262.777, 'eval_samples_per_second': 95.138, 'eval_steps_per_second': 11.892, 'epoch': 3.0}





In [3]:
import numpy as np

# 获取预测结果
predictions = trainer.predict(encoded_test_dataset)
y_pred = np.argmax(predictions.predictions, axis=-1)
y_true = predictions.label_ids

# 计算准确度
accuracy = np.sum(y_pred == y_true) / len(y_true)
print(f"Accuracy: {accuracy:.4f}")

# 生成分类报告
def classification_report(y_true, y_pred, target_names):
    from collections import defaultdict

    report = defaultdict(lambda: defaultdict(int))
    for true, pred in zip(y_true, y_pred):
        report[true]['total'] += 1
        report[pred]['predicted'] += 1
        if true == pred:
            report[true]['correct'] += 1

    output = []
    for i, name in enumerate(target_names):
        total = report[i]['total']
        correct = report[i]['correct']
        predicted = report[i]['predicted']
        precision = correct / predicted if predicted > 0 else 0
        recall = correct / total if total > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        output.append(f"{name:15} precision: {precision:.4f} recall: {recall:.4f} f1: {f1:.4f} support: {total}")

    return "\n".join(output)

# 获取标签名称
label_names = dataset['train'].features['label'].names

# 打印分类报告
report = classification_report(y_true, y_pred, label_names)
print(report)



100%|██████████| 3125/3125 [04:06<00:00, 12.69it/s]

Accuracy: 0.9412
neg             precision: 0.9399 recall: 0.9426 f1: 0.9412 support: 12500
pos             precision: 0.9424 recall: 0.9398 f1: 0.9411 support: 12500



