In [1]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

import os
from datasets import load_dataset
from evaluate import load
from transformers import (
    BertForSequenceClassification,
    BertTokenizerFast,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    EarlyStoppingCallback,
    get_cosine_schedule_with_warmup

)
import numpy as np
import torch
from transformers.trainer import Trainer
from transformers.optimization import get_scheduler

In [3]:
def main():
    # 1. 加载数据集（假设已准备好 JSONL 格式）
    dataset = load_dataset(
        "json",
        data_files={"train": "train.jsonl",
                    "validation": "holdout.jsonl"}
    )

    # 2. 加载 tokenizer & model
    model_name = "/root/autodl-tmp/Day4/bert-base-chinese/"
    tokenizer  = BertTokenizerFast.from_pretrained(model_name)
    model      = BertForSequenceClassification.from_pretrained(model_name, num_labels=5)

    # 3. 数据预处理：tokenize
    label2id = {"财经": 0, "体育": 1, "娱乐": 2, "教育": 3, "科技": 4}
    id2label = {v: k for k, v in label2id.items()}

    # 把字符串 label 映射为数字
    def encode_labels(example):
        example["label"] = label2id[example["label"]]
        return example
    dataset = dataset.map(encode_labels)

    def tokenize_fn(batch):
        # 批量 tokenization
        out = tokenizer(batch["text"] , truncation = True , max_length = 256)
        batch["input_ids"] = out["input_ids"]
        return batch
    tokenized_ds = dataset.map(tokenize_fn, batched=True, remove_columns=["text"])

    # 4. 准备 DataCollator（动态 padding）
    data_collator = DataCollatorWithPadding(tokenizer)

    # 5. Metric：accuracy + macro F1
    accuracy = load("accuracy")
    f1       = load("f1")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        preds = np.argmax(logits, axis=-1)
        acc   = accuracy.compute(predictions=preds, references=labels)["accuracy"]
        f1m   = f1.compute(predictions=preds, references=labels, average="macro")["f1"]
        return {"accuracy": acc, "f1": f1m}

    # 6. TrainingArguments
    # training_args = TrainingArguments(
    #     output_dir="./bert_chinese_baseline",
    #     per_device_train_batch_size=16,
    #     per_device_eval_batch_size=32,
    #     learning_rate=2e-5,
    #     num_train_epochs=3,
    #     weight_decay=0.01,
    #     logging_dir="./logs",
    #     logging_steps=50,
    #     save_strategy="epoch",     # 老版只支持 save_strategy / save_steps
    #     warmup_steps = 500,
    #     report_to = "tensorboard",
    #     remove_unused_columns = False,
    #     metric_for_best_model = "eval_f1",
    #     greater_is_better     = True,
    # )
    training_args = TrainingArguments(
        output_dir="./bert_day5",
        evaluation_strategy="steps",
        eval_steps=200,
        logging_strategy="steps",
        logging_steps=50,
        save_strategy="steps",
        save_steps=200,
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        greater_is_better=True,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=32,
        num_train_epochs=10,
        weight_decay=0.01,
        learning_rate=2e-5,
        warmup_steps=500,             # 预热步数
        logging_dir="./runs",         # TensorBoard 日志目录
        report_to="tensorboard",      # 向 TensorBoard 报告
        remove_unused_columns=False,  # 保证回调中能拿到所有数据
    )

    class CosineTrainer(Trainer):
        def create_scheduler(self, num_training_steps: int, optimizer = None):
            if self.lr_scheduler is None:
                self.lr_scheduler = get_scheduler(
                    name="cosine",
                    optimizer=optimizer or self.optimizer,
                    num_warmup_steps=self.args.get_warmup_steps(num_training_steps),
                    num_training_steps=num_training_steps,
                )
            return self.lr_scheduler

    # 7. 初始化 Trainer，添加 EarlyStoppingCallback
    trainer = CosineTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_ds["train"],
        eval_dataset=tokenized_ds["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=20)]  # 验证指标 3 次不升即停
    )

    # 8. 训练
    trainer.train()

    # 9. 评估并输出
    results = trainer.evaluate()
    print("=== Validation Metrics ===")
    print(f"Accuracy: {results['eval_accuracy']:.4f}")
    print(f"Macro F1 : {results['eval_f1']:.4f}")

if __name__ == "__main__":
    main()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /root/autodl-tmp/Day4/bert-base-chinese/ and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Accuracy,F1
200,0.2072,0.105698,0.98,0.979816
400,0.1234,0.088575,0.982,0.981661
600,0.0949,0.101833,0.98,0.979982
800,0.1218,0.065459,0.985,0.984733
1000,0.0564,0.100233,0.976,0.975467
1200,0.0641,0.060267,0.983,0.982423
1400,0.0275,0.089795,0.982,0.981603
1600,0.0479,0.083844,0.985,0.985128
1800,0.0375,0.086206,0.985,0.985008
2000,0.0051,0.134042,0.981,0.980647


=== Validation Metrics ===
Accuracy: 0.9890
Macro F1 : 0.9889
