In [1]:
from transformers import BertModel, BertTokenizer, AutoConfig
import torch
import torch.utils.data as Data
from sklearn.metrics import f1_score
from transformers import Trainer, TrainingArguments

from models.BertLastFour_MeanMaxPool import BertLastFour_MeanMaxPool
from data_ag_news.data_process.data_process_bert import Dataset, get_collate_fn

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_ckpt = "bert-base-uncased"

token = BertTokenizer.from_pretrained(model_ckpt)
collate_fn = get_collate_fn(token, max_len=141)
dataset_test = Dataset('../datasets/test.jsonl')
dataset_train = Dataset('../datasets/train.jsonl')

dataLoader = Data.DataLoader(dataset=dataset_test, batch_size=4, collate_fn=collate_fn)
for i in dataLoader:
    print(i)
    break

{'input_ids': tensor([[  101, 10069,  2005,  1056,  1050, 11550,  2044,  7566,  9209,  5052,
          3667,  2012,  6769,  2047,  8095,  2360,  2027,  2024,  1005,  9364,
          1005,  2044,  7566,  2007, 16654,  6687,  3813,  2976,  9587, 24848,
          1012,   102,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,  

In [3]:
config_update = AutoConfig.from_pretrained(model_ckpt)
config_update.update({
    "output_hidden_states": True
})
pretrained = BertModel.from_pretrained(model_ckpt, config=config_update)

# 冻结网络层参数(不进行梯度更新)
for param in pretrained.parameters():
    param.requires_grad = False

criterion = torch.nn.CrossEntropyLoss()
model = BertLastFour_MeanMaxPool(pretrained, 4, criterion)
model = model.to(device)
model

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertLastFour_MeanMaxPool(
  (pretrained): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [4]:
def compute_metrics(pred):
    """验证数据集评估函数"""
    labels = pred.label_ids  # 对应自定义模型forward函数输入:labels
    preds = pred.predictions  # 对应自定义模型forward函数返回值的第二个元素
    preds_argmax = preds.argmax(-1)
    f1 = f1_score(labels, preds_argmax, average='micro')
    return {"f1 score": f1}  # return a dictionary string to metric value


training_args = TrainingArguments(
    output_dir='output_dir',
    overwrite_output_dir=True,
    seed=42,
    num_train_epochs=10.0,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    evaluation_strategy="epoch",
    logging_strategy='epoch',
    disable_tqdm=False,  # 是否使用tqdm显示进度
)

trainer = Trainer(model=model,
                  args=training_args,
                  train_dataset=dataset_train,
                  eval_dataset=dataset_test,
                  data_collator=collate_fn,
                  compute_metrics=compute_metrics,
                  tokenizer=token)

trainer.train()  # 模型训练

***** Running training *****
  Num examples = 120000
  Num Epochs = 10
  Instantaneous batch size per device = 128
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 9380


Epoch,Training Loss,Validation Loss,F1 score
1,0.8748,0.847427,0.898684
2,0.8459,0.841223,0.904474
3,0.8417,0.836374,0.906579
4,0.8382,0.835117,0.908026
5,0.8349,0.834495,0.909079
6,0.8329,0.834758,0.908947
7,0.8307,0.830962,0.910921
8,0.8284,0.830273,0.912105
9,0.8261,0.829699,0.913421
10,0.8242,0.829006,0.912632


Saving model checkpoint to output_dir/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
tokenizer config file saved in output_dir/checkpoint-500/tokenizer_config.json
Special tokens file saved in output_dir/checkpoint-500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 7600
  Batch size = 128
Saving model checkpoint to output_dir/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
tokenizer config file saved in output_dir/checkpoint-1000/tokenizer_config.json
Special tokens file saved in output_dir/checkpoint-1000/special_tokens_map.json
Saving model checkpoint to output_dir/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
tokenizer config file saved in output_dir/checkpoint-1500/tokenizer_config.json
Special tokens file saved in output_dir/checkpoint-1500/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 7600
  Batch size = 128
Saving 

TrainOutput(global_step=9380, training_loss=0.8377859933035714, metrics={'train_runtime': 3572.1775, 'train_samples_per_second': 335.93, 'train_steps_per_second': 2.626, 'total_flos': 0.0, 'train_loss': 0.8377859933035714, 'epoch': 10.0})