## 1.model总结：
- 模型分为根据编解码的结构分为：
	- 编码器模型，例如BERT; 适合做命名实体识别、文本分类、阅读理解
	- 解码器模型，例如GPT、bloom、LLAMA。适合做文本生成
	- 编码器解码器模型，例如T5，GLM
- model Head最模型的输出进一步做映射。是一种任务头。分为
	- *Model只返回模型本身的内容
	- *ForCausalLM，解码器模型
	- *ForMaskedLM，编码器模型
	- *Seq2SeqLM
	- *ForSequenceClassification，分类


In [1]:
# 1.模型的保存与加载
from transformers import AutoConfig, AutoModel, AutoTokenizer

In [2]:
# 加载BERT模型
model = AutoModel.from_pretrained("/gemini/pretrain3")
model

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(21128, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [3]:
# 模型下载
! git clone "https://huggingface.co/hf1/rbt3"

Cloning into 'rbt3'...
^C


In [5]:
! git lfs clone "https://huggingface.co/hf1/rbt3" --include="*.bin"

git: 'lfs' is not a git command. See 'git --help'.

The most similar command is
	log


In [7]:
# 2.配置模型的加载参数
model = AutoModel.from_pretrained("/gemini/pretrain3")

In [8]:
# 模型层的参数
model.config

BertConfig {
  "_name_or_path": "/gemini/pretrain3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.37.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [10]:
# 3. AutoConfig查看模型运行时的参数
config = AutoConfig.from_pretrained("/gemini/pretrain3")
config

BertConfig {
  "_name_or_path": "/gemini/pretrain3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.37.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [11]:
config.attribute_map

{}

In [21]:
# 4.模型调用
sen = "弱小的我有个大大的梦想"
tokenizer = AutoTokenizer.from_pretrained("/gemini/pretrain3")
input = tokenizer(sen, return_tensors="pt")
input

{'input_ids': tensor([[ 101, 2483, 2207, 4638, 2769, 3300,  702, 1920, 1920, 4638, 3457, 2682,
          102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [25]:
import torch 

if torch.cuda.is_available():
    model = AutoModel.from_pretrained("/gemini/pretrain3").cuda()
    device = model.device

In [26]:
# 输出的结果
input.to(device)
output = model(**input)
output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.6285,  0.1057,  0.0249,  ...,  0.4600,  0.0488, -0.4260],
         [ 0.0029,  0.1078, -0.7839,  ..., -0.4183, -0.4560, -0.7294],
         [ 1.4415, -1.3841, -1.0020,  ...,  0.7041, -0.1755, -0.4673],
         ...,
         [ 0.4306,  0.0630, -0.1104,  ...,  0.4592,  0.7441, -0.2455],
         [ 0.1210,  0.0339, -0.3225,  ...,  0.4293,  0.3329, -0.1104],
         [ 0.2389, -0.0932, -0.1649,  ...,  0.3371, -0.3371, -0.4923]]],
       device='cuda:0', grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 9.9979e-01,  9.9998e-01,  9.9775e-01,  9.9517e-01,  9.6599e-01,
          9.5374e-01, -9.2898e-01, -9.8111e-01,  9.9483e-01, -9.9941e-01,
          1.0000e+00,  9.9860e-01,  2.4086e-01, -9.5973e-01,  9.9988e-01,
         -9.9992e-01, -5.2047e-01,  9.9701e-01,  9.8733e-01,  9.2102e-03,
          9.9988e-01, -1.0000e+00, -9.2539e-01, -3.3459e-01,  5.3460e-01,
          9.9752e-01,  9.5977e-01, -9.8703e-01, -9

In [33]:
# 5.使用带model head的模型
from transformers import AutoModelForSequenceClassification,BertForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("/gemini/pretrain3",num_labels=10).to("cuda:0")
model

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /gemini/pretrain3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [34]:
# 现在是一个二分类，默认的结果。分类的类型可以进行调整
output = model(**input)
output

SequenceClassifierOutput(loss=None, logits=tensor([[-0.0195, -0.1944,  0.6337,  0.7816,  0.3433,  0.9622,  0.4997,  0.7193,
          0.2064, -0.5637]], device='cuda:0', grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

[31mApplication will exit soon in 3600 seconds which is set by env"ORION_TASK_IDLE_TIME".[0m


: 