In [10]:
from transformers import AutoModel, AutoTokenizer, ErnieModel

In [2]:
tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-health-zh")
print(tokenizer.model_input_names)
print(tokenizer.all_special_tokens)

['input_ids', 'token_type_ids', 'attention_mask']
['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']


In [3]:
'''
主要预训练模型:
'nghuyong/ernie-3.0-medium-zh'   6-layer, 768-hidden, 12-heads, 75M parameters. Trained on Chinese text.
'nghuyong/ernie-3.0-base-zh'   12-layer, 768-hidden, 12-heads, 118M parameters. Trained on Chinese text.
'nghuyong/ernie-3.0-xbase-zh'    20-layer, 1024-hidden, 16-heads, 296M parameters. Trained on Chinese text.
'nghuyong/ernie-health-zh'
'''
model = AutoModel.from_pretrained("nghuyong/ernie-health-zh")
model

Some weights of ErnieModel were not initialized from the model checkpoint at nghuyong/ernie-health-zh and are newly initialized: ['ernie.pooler.dense.bias', 'ernie.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ErnieModel(
  (embeddings): ErnieEmbeddings(
    (word_embeddings): Embedding(22608, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): ErnieEncoder(
    (layer): ModuleList(
      (0): ErnieLayer(
        (attention): ErnieAttention(
          (self): ErnieSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): ErnieSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
   

In [4]:
model.config

ErnieConfig {
  "_name_or_path": "nghuyong/ernie-health-zh",
  "architectures": [
    "ErnieModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "embedding_size": 768,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 512,
  "model_type": "ernie",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "task_type_vocab_size": 3,
  "transformers_version": "4.23.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "use_task_id": false,
  "vocab_size": 22608
}

In [5]:
for name, para in model.named_parameters():
    print(name)

embeddings.word_embeddings.weight
embeddings.position_embeddings.weight
embeddings.token_type_embeddings.weight
embeddings.LayerNorm.weight
embeddings.LayerNorm.bias
encoder.layer.0.attention.self.query.weight
encoder.layer.0.attention.self.query.bias
encoder.layer.0.attention.self.key.weight
encoder.layer.0.attention.self.key.bias
encoder.layer.0.attention.self.value.weight
encoder.layer.0.attention.self.value.bias
encoder.layer.0.attention.output.dense.weight
encoder.layer.0.attention.output.dense.bias
encoder.layer.0.attention.output.LayerNorm.weight
encoder.layer.0.attention.output.LayerNorm.bias
encoder.layer.0.intermediate.dense.weight
encoder.layer.0.intermediate.dense.bias
encoder.layer.0.output.dense.weight
encoder.layer.0.output.dense.bias
encoder.layer.0.output.LayerNorm.weight
encoder.layer.0.output.LayerNorm.bias
encoder.layer.1.attention.self.query.weight
encoder.layer.1.attention.self.query.bias
encoder.layer.1.attention.self.key.weight
encoder.layer.1.attention.self.key

In [6]:
text = '浅表器官彩色多普勒超声检查（甲状腺+双侧颈部淋巴结）:右侧甲状腺囊性回声，C-TIRADS 2类。  '
encoded_input = tokenizer(text, return_tensors='pt')
print(encoded_input)
print(encoded_input.keys())
print(encoded_input['input_ids'].shape)

{'input_ids': tensor([[    2,  1276,   380,   710,  1257,   607,   294,    34,   899,  2103,
           247,   584,    72,    52,     1,   361,   107,   283,   894,   526,
           442,   373,   125,   680,   579,   150,     1,     8,   274,   442,
           361,   107,   283,   296,    49,   193,   584,     1,  1026,   246,
         14727,   249,   391,    10,     3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])
torch.Size([1, 45])


In [7]:
result = model(**encoded_input)
print(result.last_hidden_state)
print(result.last_hidden_state.shape)  # [1, 45](限制input_ids最大长度为512)

tensor([[[-0.0706, -0.2264,  0.2430,  ...,  0.0048,  0.1487, -0.4339],
         [-0.0776,  0.3773, -0.2424,  ..., -0.0373, -0.1223, -0.1551],
         [-0.3959, -0.0680, -0.1069,  ...,  0.1916,  0.0488, -0.2020],
         ...,
         [ 0.5982,  0.5061, -0.1198,  ..., -0.1833, -0.2860,  0.1238],
         [ 0.3649, -0.0703, -0.1436,  ..., -0.0112,  0.0233, -0.0195],
         [-0.0706, -0.2264,  0.2430,  ...,  0.0048,  0.1487, -0.4339]]],
       grad_fn=<NativeLayerNormBackward0>)
torch.Size([1, 45, 768])


In [16]:
tokenizer_ernie = AutoTokenizer.from_pretrained("nghuyong/ernie-3.0-medium-zh")
print(tokenizer_ernie.model_input_names)
print(tokenizer_ernie.all_special_tokens)

Downloading:   0%|          | 0.00/187k [00:00<?, ?B/s]

['input_ids', 'token_type_ids', 'attention_mask']
['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']


In [17]:
model_ernie = ErnieModel.from_pretrained("nghuyong/ernie-3.0-medium-zh")
model_ernie

Some weights of ErnieModel were not initialized from the model checkpoint at nghuyong/ernie-3.0-medium-zh and are newly initialized: ['ernie.pooler.dense.bias', 'ernie.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ErnieModel(
  (embeddings): ErnieEmbeddings(
    (word_embeddings): Embedding(40000, 768, padding_idx=0)
    (position_embeddings): Embedding(2048, 768)
    (token_type_embeddings): Embedding(4, 768)
    (task_type_embeddings): Embedding(16, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): ErnieEncoder(
    (layer): ModuleList(
      (0): ErnieLayer(
        (attention): ErnieAttention(
          (self): ErnieSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): ErnieSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        

In [18]:
model_ernie.config

ErnieConfig {
  "_name_or_path": "nghuyong/ernie-3.0-medium-zh",
  "architectures": [
    "ErnieForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 2048,
  "model_type": "ernie",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "task_type_vocab_size": 16,
  "transformers_version": "4.23.1",
  "type_vocab_size": 4,
  "use_cache": true,
  "use_task_id": true,
  "vocab_size": 40000
}

In [19]:
for name, para in model_ernie.named_parameters():
    print(name)

embeddings.word_embeddings.weight
embeddings.position_embeddings.weight
embeddings.token_type_embeddings.weight
embeddings.task_type_embeddings.weight
embeddings.LayerNorm.weight
embeddings.LayerNorm.bias
encoder.layer.0.attention.self.query.weight
encoder.layer.0.attention.self.query.bias
encoder.layer.0.attention.self.key.weight
encoder.layer.0.attention.self.key.bias
encoder.layer.0.attention.self.value.weight
encoder.layer.0.attention.self.value.bias
encoder.layer.0.attention.output.dense.weight
encoder.layer.0.attention.output.dense.bias
encoder.layer.0.attention.output.LayerNorm.weight
encoder.layer.0.attention.output.LayerNorm.bias
encoder.layer.0.intermediate.dense.weight
encoder.layer.0.intermediate.dense.bias
encoder.layer.0.output.dense.weight
encoder.layer.0.output.dense.bias
encoder.layer.0.output.LayerNorm.weight
encoder.layer.0.output.LayerNorm.bias
encoder.layer.1.attention.self.query.weight
encoder.layer.1.attention.self.query.bias
encoder.layer.1.attention.self.key.we

In [28]:
text = '当地时间10月30日晚间，巴西总统选举第二轮投票约93%的选票已统计完成。' * 200
encoded_input = tokenizer_ernie(text, return_tensors='pt')
print(encoded_input)
print(encoded_input.keys())
print(encoded_input['input_ids'].shape)  # [1, 6802](用多层Transformer XL作为主干网络)

{'input_ids': tensor([[    1,   153,    31,  ...,    33, 12043,     2]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1]])}
dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])
torch.Size([1, 6802])
