In [5]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

# 模型加载与保存

In [6]:
from transformers import AutoConfig, AutoModel, AutoTokenizer


# 在线加载

In [7]:
model = AutoModel.from_pretrained("hfl/rbt3",force_download=True,cache_dir='./rbt3')


# 离线加载

In [8]:
# 一定要是有config.json、pytorch_model.bin 的文件夹
model = AutoModel.from_pretrained('./rbt3/models--hfl--rbt3/snapshots/0aa0527ff4170f29e1dfd3eb6ef60dc67e1bf75c')

# 模型加载参数

In [9]:
model = AutoModel.from_pretrained('./rbt3/models--hfl--rbt3/snapshots/0aa0527ff4170f29e1dfd3eb6ef60dc67e1bf75c')


In [10]:
model.config

BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "dtype": "float32",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.57.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

In [11]:
config = AutoConfig.from_pretrained('./rbt3/models--hfl--rbt3/snapshots/0aa0527ff4170f29e1dfd3eb6ef60dc67e1bf75c')
config

BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.57.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 21128
}

# 模型调用

In [12]:
sen = '弱小的我也有大梦想！'
tokenizer = AutoTokenizer.from_pretrained("hfl/rbt3")
inputs = tokenizer(sen,return_tensors = 'pt')
inputs

{'input_ids': tensor([[ 101, 2483, 2207, 4638, 2769,  738, 3300, 1920, 3457, 2682, 8013,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

# 不带Model Head的模型调用

In [13]:
model = AutoModel.from_pretrained('./rbt3/models--hfl--rbt3/snapshots/0aa0527ff4170f29e1dfd3eb6ef60dc67e1bf75c')


In [14]:
output = model(**inputs)
output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[ 0.6804,  0.6664,  0.7170,  ..., -0.4102,  0.7839, -0.0262],
         [-0.7378, -0.2748,  0.5034,  ..., -0.1359, -0.4331, -0.5874],
         [-0.0212,  0.5642,  0.1032,  ..., -0.3617,  0.4646, -0.4747],
         ...,
         [ 0.0853,  0.6679, -0.1757,  ..., -0.0942,  0.4664,  0.2925],
         [ 0.3336,  0.3224, -0.3355,  ..., -0.3262,  0.2532, -0.2507],
         [ 0.6761,  0.6688,  0.7154,  ..., -0.4083,  0.7824, -0.0224]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-1.2646e-01, -9.8619e-01, -1.0000e+00, -9.8325e-01,  8.0238e-01,
         -6.6268e-02,  6.6919e-02,  1.4784e-01,  9.9451e-01,  9.9995e-01,
         -8.3051e-02, -1.0000e+00, -9.8866e-02,  9.9980e-01, -1.0000e+00,
          9.9993e-01,  9.8291e-01,  9.5363e-01, -9.9948e-01, -1.3219e-01,
         -9.9733e-01, -7.7934e-01,  1.0720e-01,  9.8040e-01,  9.9953e-01,
         -9.9939e-01, -9.9997e-01,  1.4967e-01, -8.7627e-01, -9.999

In [15]:
output.last_hidden_state.size()

torch.Size([1, 12, 768])

In [16]:
len(inputs['input_ids'][0])

12

# 带Model Head的模型调用

In [17]:
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification


In [18]:
clz_model = AutoModelForSequenceClassification.from_pretrained('./rbt3/models--hfl--rbt3/snapshots/0aa0527ff4170f29e1dfd3eb6ef60dc67e1bf75c', num_labels=10)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ./rbt3/models--hfl--rbt3/snapshots/0aa0527ff4170f29e1dfd3eb6ef60dc67e1bf75c and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
clz_model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-2): 3 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-1

In [20]:
clz_model(**inputs) 


SequenceClassifierOutput(loss=None, logits=tensor([[ 0.4536, -0.1164,  0.3103, -0.0170, -0.6026, -0.5078,  0.4890, -0.1149,
          0.3790,  0.3096]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [21]:
clz_model.config.num_labels


10