## 模型参数 torch 转换 paddle

In [6]:
import sys
# load transformer 源码
sys.path.append('/workspace/transformers/src/')

sys.path.append('/workspace/PaddleNLP')

In [7]:
import paddle
import torch
import numpy as np

torch_base_model_path = 'pretrained_model/torch/base/pytorch_model.bin'
torch_large_model_path = 'pretrained_model/torch/large/pytorch_model.bin'

paddle_base_model_path = "pretrained_model/temp/paddle/base/model_state.pdparams"
paddle_large_model_path = "pretrained_model/temp/paddle/large/model_state.pdparams"

is_base = False

if is_base:
    torch_model_path = torch_base_model_path
    paddle_model_path = paddle_base_model_path
else:
    torch_model_path = torch_large_model_path
    paddle_model_path = paddle_large_model_path
    
torch_state_dict = torch.load(torch_model_path)

paddle_state_dict = {}

# State_dict's keys mapping: from torch to paddle
keys_dict = {
    # about encoder layer
    'LayerNorm': 'layer_norm',
    'encoder.layer': 'encoder.layers'
}


for torch_key in torch_state_dict:
    paddle_key = torch_key
    for k in keys_dict:
        if k in paddle_key:
            paddle_key = paddle_key.replace(k, keys_dict[k])

    if ('map_fc' in paddle_key) or ('glyph_map' in paddle_key) or ('linear' in paddle_key) or ('proj' in  paddle_key) or ('vocab' in  paddle_key and 'weight' in  paddle_key) or ("dense.weight" in paddle_key) or ('transform.weight' in paddle_key) or ('seq_relationship.weight' in paddle_key):
        print("transpose(permute) ---------->")
        paddle_state_dict[paddle_key] = paddle.to_tensor(torch_state_dict[torch_key].cpu().numpy().transpose())
    else:
        paddle_state_dict[paddle_key] = paddle.to_tensor(torch_state_dict[torch_key].cpu().numpy())

    print("t: ", torch_key,"\t", torch_state_dict[torch_key].shape)
    print("p: ", paddle_key, "\t", paddle_state_dict[paddle_key].shape, "\n")

paddle.save(paddle_state_dict, paddle_model_path)

t:  embeddings.position_ids 	 torch.Size([1, 512])
p:  embeddings.position_ids 	 [1, 512] 

t:  embeddings.word_embeddings.weight 	 torch.Size([32000, 1024])
p:  embeddings.word_embeddings.weight 	 [32000, 1024] 

t:  embeddings.position_embeddings.weight 	 torch.Size([512, 1024])
p:  embeddings.position_embeddings.weight 	 [512, 1024] 

t:  embeddings.token_type_embeddings.weight 	 torch.Size([4, 1024])
p:  embeddings.token_type_embeddings.weight 	 [4, 1024] 

t:  embeddings.LayerNorm.weight 	 torch.Size([1024])
p:  embeddings.layer_norm.weight 	 [1024] 

t:  embeddings.LayerNorm.bias 	 torch.Size([1024])
p:  embeddings.layer_norm.bias 	 [1024] 

transpose(permute) ---------->
t:  embeddings.projection.weight 	 torch.Size([1024, 1024])
p:  embeddings.projection.weight 	 [1024, 1024] 

transpose(permute) ---------->
t:  embeddings.projection.bias 	 torch.Size([1024])
p:  embeddings.projection.bias 	 [1024] 

t:  encoder.layer.0.fourier.output.LayerNorm.weight 	 torch.Size([1024])
p:  e

## 对比前项精度

### Base

In [8]:
# torch
from transformers import FNetTokenizer, FNetModel
torch_tokenizer = FNetTokenizer.from_pretrained("google/fnet-base")
torch_model = FNetModel.from_pretrained("google/fnet-base")

Some weights of the model checkpoint at google/fnet-base were not used when initializing FNetModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing FNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing FNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
text = "Replace me by any text you'd like."

In [13]:
torch_model.eval()
torch_inputs = torch_tokenizer(text, return_tensors="pt")
torch_outputs = torch_model(**torch_inputs)

torch_logits = torch_outputs[0]
torch_array = torch_logits.cpu().detach().numpy()
print("torch_prediction_logits shape:{}".format(torch_array.shape))
print("torch_prediction_logits:{}".format(torch_array))

torch_prediction_logits shape:(1, 12, 768)
torch_prediction_logits:[[[ 4.4527473  -0.10137583 -0.21348645 ...  0.36847726 -0.23560826
   -0.25296995]
  [ 0.18779421 -0.39948907  0.23660113 ...  0.19996837  0.2783861
    0.27940997]
  [ 0.17300639  0.0606944  -0.47870204 ...  0.05200686 -1.295673
    0.578657  ]
  ...
  [ 0.17106001  0.00485597 -0.06762558 ... -0.3548019  -0.82001925
    0.01953557]
  [ 0.06859297  0.23336133 -0.57087415 ... -0.3022466  -0.58877695
   -0.11472143]
  [ 0.24348916  0.1635376   0.36463982 ...  0.5811312  -0.60914195
   -0.32033262]]]


In [14]:
# paddle
import paddle
import paddlenlp as ppnlp

model_path = '/root/.paddlenlp/models/fnet-base/'
paddle_tokenizer = ppnlp.transformers.FNetTokenizer.from_pretrained(model_path)

paddle_model = ppnlp.transformers.FNetModel.from_pretrained(model_path)

In [15]:
paddle_model.eval()
paddle_inputs = paddle_tokenizer(text)
paddle_inputs = {k:paddle.to_tensor([v]) for (k, v) in paddle_inputs.items()}
paddle_outputs = paddle_model(**paddle_inputs)

paddle_logits = paddle_outputs[0]
paddle_array = paddle_logits.numpy()
print("paddle_prediction_logits shape:{}".format(paddle_array.shape))
print("paddle_prediction_logits:{}".format(paddle_array))

paddle_prediction_logits shape:(1, 12, 768)
paddle_prediction_logits:[[[ 4.452679   -0.10139666 -0.21349066 ...  0.36848179 -0.23563094
   -0.25295028]
  [ 0.18779372 -0.39948893  0.23660143 ...  0.19996753  0.27838627
    0.27941138]
  [ 0.1730059   0.06069482 -0.47870392 ...  0.05200668 -1.2956748
    0.5786573 ]
  ...
  [ 0.17105973  0.00485592 -0.06762674 ... -0.3548022  -0.8200205
    0.01953554]
  [ 0.06859197  0.23335953 -0.57087404 ... -0.30224594 -0.58877856
   -0.11472122]
  [ 0.2434903   0.1635379   0.3646408  ...  0.5811306  -0.60914266
   -0.3203326 ]]]


In [16]:
import numpy as np
assert torch_array.shape == paddle_array.shape, "the output logits should have the same shape, but got : {} and {} instead".format(torch_array.shape, paddle_array.shape)
diff = torch_array - paddle_array
print(np.amax(abs(diff)))

6.818771e-05


In [17]:
# save pretrained model
base_model = 'pretrained_model/paddle/base/'
paddle_model.save_pretrained(base_model)
paddle_tokenizer.save_pretrained(base_model)

### large

In [21]:
# torch
from transformers import FNetTokenizer, FNetModel
torch_tokenizer = FNetTokenizer.from_pretrained("google/fnet-large")
torch_model = FNetModel.from_pretrained("google/fnet-large")

Some weights of the model checkpoint at google/fnet-large were not used when initializing FNetModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing FNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing FNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
text = "Replace me by any text you'd like."

In [23]:
torch_model.eval()
torch_inputs = torch_tokenizer(text, return_tensors="pt")
torch_outputs = torch_model(**torch_inputs)

torch_logits = torch_outputs[0]
torch_array = torch_logits.cpu().detach().numpy()
print("torch_prediction_logits shape:{}".format(torch_array.shape))
print("torch_prediction_logits:{}".format(torch_array))

torch_prediction_logits shape:(1, 12, 1024)
torch_prediction_logits:[[[-0.1167793  -0.44756714 -0.51739615 ... -0.19486761 -0.42286307
   -0.68805295]
  [-0.05607523  0.16956906  0.731847   ... -0.23074943 -0.02952154
    0.42456526]
  [-0.09154158  0.23390733  0.12532961 ... -0.39507478  0.2645798
    0.27237514]
  ...
  [ 0.29037276 -0.2935935  -0.72729474 ... -0.10259533  0.09559726
   -0.27696082]
  [ 0.07809267  0.3788107   0.45179468 ...  0.31190634  0.15828757
    0.17926018]
  [ 0.5804361  -0.18722543  0.3022339  ...  0.6312446   0.5981479
    0.42219177]]]


In [24]:
# paddle
import paddle
import paddlenlp as ppnlp

model_path = '/root/.paddlenlp/models/fnet-large/'
paddle_tokenizer = ppnlp.transformers.FNetTokenizer.from_pretrained(model_path)

paddle_model = ppnlp.transformers.FNetModel.from_pretrained(model_path)

In [25]:
paddle_model.eval()
paddle_inputs = paddle_tokenizer(text)
paddle_inputs = {k:paddle.to_tensor([v]) for (k, v) in paddle_inputs.items()}
paddle_outputs = paddle_model(**paddle_inputs)

paddle_logits = paddle_outputs[0]
paddle_array = paddle_logits.numpy()
print("paddle_prediction_logits shape:{}".format(paddle_array.shape))
print("paddle_prediction_logits:{}".format(paddle_array))

paddle_prediction_logits shape:(1, 12, 1024)
paddle_prediction_logits:[[[-0.11681072 -0.44754764 -0.51738214 ... -0.19486131 -0.42285317
   -0.68803596]
  [-0.05607542  0.16956775  0.7318458  ... -0.23074819 -0.02952065
    0.42456695]
  [-0.09154156  0.23390746  0.12533087 ... -0.3950725   0.26458147
    0.27237627]
  ...
  [ 0.29037338 -0.2935925  -0.7272925  ... -0.10259687  0.09559766
   -0.27695963]
  [ 0.07809278  0.37881133  0.45179617 ...  0.31190765  0.1582875
    0.17926173]
  [ 0.58043474 -0.18722285  0.30223408 ...  0.6312434   0.59814847
    0.4221893 ]]]


In [26]:
import numpy as np
assert torch_array.shape == paddle_array.shape, "the output logits should have the same shape, but got : {} and {} instead".format(torch_array.shape, paddle_array.shape)
diff = torch_array - paddle_array
print(np.amax(abs(diff)))

0.000116825104


In [27]:
# save pretrained model
base_model = 'pretrained_model/paddle/large/'
paddle_model.save_pretrained(base_model)
paddle_tokenizer.save_pretrained(base_model)