# 大语言模型Transformer库-Model组件实践

## 模型的加载

In [1]:
from transformers import AutoConfig, AutoModel, AutoTokenizer

# 在线加载
# model = AutoModel.from_pretrained("hfl/rbt3", force_download=True)
# 离线加载
model = AutoModel.from_pretrained("hfl/rbt3")
tokenizer= AutoTokenizer.from_pretrained("hfl/rbt3")

config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/156M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/19.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

## 模型的保存

In [None]:
# 指定保存模型和分词器的目录路径
model_save_path = "path_to_save_model"
tokenizer_save_path = "path_to_save_tokenizer"

# 保存模型
model.save_pretrained(model_save_path)

# 保存分词器
tokenizer.save_pretrained(tokenizer_save_path)

## 模型加载参数
```bash
BertConfig {
  "_name_or_path": "/root/代码/Model组件/rbt3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.35.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}
```

In [2]:
model = AutoModel.from_pretrained("hfl/rbt3")
model.config

BertConfig {
  "_name_or_path": "hfl/rbt3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.41.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [3]:
config = AutoConfig.from_pretrained("hfl/rbt3")
config



BertConfig {
  "_name_or_path": "hfl/rbt3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.41.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

## 模型调用

In [4]:
sen = "今天天气不错，我的心情也不错！"
tokenizer = AutoTokenizer.from_pretrained("hfl/rbt3")
inputs = tokenizer(sen, return_tensors="pt")
inputs

{'input_ids': tensor([[ 101,  791, 1921, 1921, 3698,  679, 7231, 8024, 2769, 4638, 2552, 2658,
          738,  679, 7231, 8013,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

### 不带Model Head的模型调用

In [5]:
# 加载时设置了output_attentions=True。
# 这意味着模型在推理时不仅会输出最终的logits或预测结果，还会输出每个层的注意力权重。
# 这通常用于调试或分析模型的内部工作机制
model = AutoModel.from_pretrained("hfl/rbt3", output_attentions=True)
output = model(**inputs)
output



BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.1970,  0.6945,  1.0645,  ..., -0.0763,  0.2589, -0.5017],
         [-0.2034,  0.7565,  0.7339,  ...,  0.0791, -0.7068,  0.2239],
         [ 0.5075,  0.6311, -0.0454,  ..., -0.5820,  0.2189,  0.0670],
         ...,
         [ 0.3410,  0.2382, -0.1919,  ..., -0.0397,  0.2051,  0.0454],
         [ 0.4606,  0.3564, -0.4006,  ..., -0.3121,  0.3912, -0.1599],
         [ 0.1961,  0.6968,  1.0595,  ..., -0.0724,  0.2590, -0.5024]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 8.7897e-02, -9.9727e-01, -9.9996e-01, -9.4828e-01, -1.0888e-01,
         -4.5711e-02,  5.6735e-02,  5.0818e-01,  9.9614e-01,  9.9983e-01,
          3.4609e-02, -1.0000e+00, -4.3255e-02,  9.9985e-01, -9.9996e-01,
          9.9992e-01,  9.3540e-01,  9.9044e-01, -9.5642e-01, -6.8368e-02,
         -9.8755e-01, -5.2575e-01,  7.7211e-04,  9.8415e-01,  9.9882e-01,
         -9.8423e-01, -9.9975e-01,  2.0484e-01, -6.7945e-01, -9.999

### 带Model Head的模型调用

In [6]:
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification
# 使用AutoModelForSequenceClassification类加载一个用于序列分类任务的预训练模型。
# 这个类是专门用于文本分类任务的，如情感分析或主题分类，它期望输出是整个输入序列的分类结果。
# 它从指定路径加载模型，并使用inputs进行推理。
clz_model = AutoModelForSequenceClassification.from_pretrained("hfl/rbt3")

clz_model(**inputs)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at hfl/rbt3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SequenceClassifierOutput(loss=None, logits=tensor([[ 0.5211, -0.2505]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [7]:
# 这段代码与上面代码类似，但它在加载模型时额外指定了num_labels=2参数。
# 这个参数对于序列分类模型是重要的，因为它告诉模型输出层期望的输出尺寸，即分类标签的数量。
# 如果模型被微调用于一个具有两个标签的分类任务（如正面情感和负面情感），这个参数是必须的。
# 如果没有正确设置num_labels，模型的输出可能无法正确映射到标签空间。
clz_model = AutoModelForSequenceClassification.from_pretrained("hfl/rbt3", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at hfl/rbt3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
