# 模型加載與保存

In [5]:
from transformers import AutoConfig, AutoModel, AutoTokenizer
# 本地
#location='../../pretrain/rbt3'
# 遠端
location='hfl/rbt3'
import torch
device='cuda:0' if torch.cuda.is_available() else 'cpu'

## 在線加載
```python
model = AutoModel.from_pretrained("hfl/rbt3", force_download=True).to(device)
```
## 模型下載
```python
!git clone "https://huggingface.co/hfl/rbt3"
!git lfs clone "https://huggingface.co/hfl/rbt3" --include="*.bin"
```
## 離線加載
```
model = AutoModel.from_pretrained("../../pretrain/rbt3")
```

## 模型加載參數

In [8]:
model = AutoModel.from_pretrained(location).to(device) #"../../pretrain/rbt3"

In [9]:
model.config

BertConfig {
  "_attn_implementation_autoset": true,
  "_name_or_path": "hfl/rbt3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.46.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [10]:
config = AutoConfig.from_pretrained(location)
config

BertConfig {
  "_name_or_path": "hfl/rbt3",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.46.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [11]:
config.output_attentions

False

In [12]:
from transformers import BertConfig

# 模型調用

In [22]:
# 如果直接就要使用,pipeline 就行 
from transformers import pipeline
gen=pipeline("text-classification",location,device=device)
sen = "弱小的我也有大夢想！"
gen(sen)
sen = "饭菜有些咸！"
gen(sen)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at hfl/rbt3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[{'label': 'LABEL_0', 'score': 0.6710025072097778}]

In [13]:
sen = "弱小的我也有大夢想！"
tokenizer = AutoTokenizer.from_pretrained(location)
inputs = tokenizer(sen, return_tensors="pt")
inputs

{'input_ids': tensor([[ 101, 2483, 2207, 4638, 2769,  738, 3300, 1920, 1918, 2682, 8013,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

## 不帶Model Head的模型調用

In [23]:
model = AutoModel.from_pretrained(location, output_attentions=True)

In [24]:
output = model(**inputs)
output



BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.8812,  0.9352,  0.5495,  ..., -0.5131,  0.9547,  0.1613],
         [-0.5624, -0.1943,  0.3573,  ..., -0.1176, -0.4833, -0.4479],
         [ 0.1194,  0.6461, -0.0019,  ..., -0.4652,  0.4643, -0.4450],
         ...,
         [ 0.2789,  0.2843, -0.2725,  ...,  0.0100,  0.5123, -0.0343],
         [ 0.4851,  0.2916, -0.3561,  ..., -0.2059,  0.3635, -0.1149],
         [ 0.8771,  0.9381,  0.5470,  ..., -0.5125,  0.9530,  0.1641]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-1.9194e-01, -8.3136e-01, -1.0000e+00, -9.8282e-01,  4.9045e-01,
          7.3040e-02,  1.7596e-01,  2.7832e-01,  9.9510e-01,  9.9998e-01,
         -7.5305e-02, -1.0000e+00, -4.0089e-02,  9.9940e-01, -1.0000e+00,
          9.9998e-01,  9.5928e-01,  9.7920e-01, -9.9934e-01, -1.4762e-01,
         -9.9442e-01, -7.9461e-01,  1.2178e-01,  9.7069e-01,  9.9981e-01,
         -9.9928e-01, -9.9999e-01,  2.4507e-01, -9.5591e-01, -9.999

In [25]:
output.last_hidden_state.size()

torch.Size([1, 12, 768])

In [26]:
len(inputs["input_ids"][0])

12

## 帶Model Head的模型調用

In [27]:
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification

In [28]:
clz_model = AutoModelForSequenceClassification.from_pretrained(location, num_labels=10)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at hfl/rbt3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
clz_model(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.2932, -0.0113,  0.2361, -0.3064,  0.0161, -0.0471, -0.4812, -0.3675,
          0.4133,  0.5009]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
clz_model.config.num_labels