# 创建Transformer

In [1]:
from transformers import BertConfig, BertModel

In [2]:
config = BertConfig()

In [3]:
model = BertModel(config)

In [4]:
print(config)
#
# HF
# hidden_size 属性定义 hidden_states 向量的大小，
# num_hidden_layers 定义 Transformer 模型具有的层数。

BertConfig {
  "_attn_implementation_autoset": true,
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.51.3",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



## 不同的加载方法

In [5]:
#
# 代码和上面一样
# 想要表达的是 这样加载的模型 权重会随机初始化
from transformers import BertConfig, BertModel

config = BertConfig()
model = BertModel(config)

# Model is randomly initialized!

In [6]:
#
# 当然 我们需要的是预训练的模型
from transformers import BertModel

model = BertModel.from_pretrained("bert-base-cased")
#
# HF
# 在上面的代码示例中，我们没有使用 BertConfig，而是通过 bert-base-cased 标识符加载了预训练模型。
# 这是一个由 BERT 的作者自己训练的模型检查点;
# 您可以在其 型号卡/model card 中找到有关它的更多详细信息。

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

## 保存方法

In [7]:
model.save_pretrained("directory_on_my_computer")
#
# 保存了两个文件 config.json pytorch_model.bin
# .bin 等于 .safetensors
# 
# pytorch_model.bin 文件称为  状态字典 / state dictionary 
# 它包含模型的所有权重。这两个文件齐头并进;该配置对于了解模型的架构是必需的，而模型权重是模型的参数。

# 使用 Transformer 模型进行推理



很好 学习了加载和保存 接下来要推理了

In [11]:
sequences = ["Hello!", "Cool.", "Nice!"]

In [12]:
encoded_sequences = [
    [101, 7592, 999, 102],
    [101, 4658, 1012, 102],
    [101, 3835, 999, 102],
]

In [13]:
import torch

model_inputs = torch.tensor(encoded_sequences)

In [14]:
output = model(model_inputs)

In [15]:
output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 4.4496e-01,  4.8276e-01,  2.7797e-01,  ..., -5.4032e-02,
           3.9393e-01, -9.4770e-02],
         [ 2.4943e-01, -4.4093e-01,  8.1772e-01,  ..., -3.1917e-01,
           2.2992e-01, -4.1172e-02],
         [ 1.3668e-01,  2.2518e-01,  1.4502e-01,  ..., -4.6914e-02,
           2.8224e-01,  7.5566e-02],
         [ 1.1789e+00,  1.6739e-01, -1.8187e-01,  ...,  2.4671e-01,
           1.0441e+00, -6.1972e-03]],

        [[ 3.6436e-01,  3.2464e-02,  2.0258e-01,  ...,  6.0111e-02,
           3.2451e-01, -2.0996e-02],
         [ 7.1866e-01, -4.8725e-01,  5.1740e-01,  ..., -4.4012e-01,
           1.4553e-01, -3.7545e-02],
         [ 3.3223e-01, -2.3271e-01,  9.4876e-02,  ..., -2.5268e-01,
           3.2172e-01,  8.1122e-04],
         [ 1.2523e+00,  3.5754e-01, -5.1320e-02,  ..., -3.7840e-01,
           1.0526e+00, -5.6255e-01]],

        [[ 2.4042e-01,  1.4718e-01,  1.2110e-01,  ...,  7.6062e-02,
           3.3564e-01,  2

In [None]:
#
# 教学写的不好 戛然而止了