## 导入 huggingface model

### base

In [21]:
from transformers import FNetTokenizer, FNetModel
tokenizer = FNetTokenizer.from_pretrained("google/fnet-base")
model = FNetModel.from_pretrained("google/fnet-base")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
output = model(**encoded_input)

Some weights of the model checkpoint at google/fnet-base were not used when initializing FNetModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing FNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing FNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
# save base model
torch_large_model_path = 'pretrained_model/torch/base'
model.save_pretrained(torch_large_model_path)
tokenizer.save_pretrained(torch_large_model_path)

('pretrained_model/torch/base/tokenizer_config.json',
 'pretrained_model/torch/base/special_tokens_map.json',
 'pretrained_model/torch/base/spiece.model',
 'pretrained_model/torch/base/added_tokens.json')

### large

In [23]:
from transformers import FNetTokenizer, FNetModel
tokenizer = FNetTokenizer.from_pretrained("google/fnet-large")
model = FNetModel.from_pretrained("google/fnet-large")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
output = model(**encoded_input)
output.last_hidden_state.shape, output.pooler_output.shape

Some weights of the model checkpoint at google/fnet-large were not used when initializing FNetModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing FNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing FNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


(torch.Size([1, 512, 1024]), torch.Size([1, 1024]))

In [24]:
# save large model
torch_large_model_path = 'pretrained_model/torch/large'
model.save_pretrained(torch_large_model_path)
tokenizer.save_pretrained(torch_large_model_path)

('pretrained_model/torch/large/tokenizer_config.json',
 'pretrained_model/torch/large/special_tokens_map.json',
 'pretrained_model/torch/large/spiece.model',
 'pretrained_model/torch/large/added_tokens.json')

In [25]:
model

FNetModel(
  (embeddings): FNetEmbeddings(
    (word_embeddings): Embedding(32000, 1024, padding_idx=3)
    (position_embeddings): Embedding(512, 1024)
    (token_type_embeddings): Embedding(4, 1024)
    (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
    (projection): Linear(in_features=1024, out_features=1024, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): FNetEncoder(
    (layer): ModuleList(
      (0): FNetLayer(
        (fourier): FNetFourierTransform(
          (self): FNetBasicFourierTransform()
          (output): FNetBasicOutput(
            (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
          )
        )
        (intermediate): FNetIntermediate(
          (dense): Linear(in_features=1024, out_features=4096, bias=True)
        )
        (output): FNetOutput(
          (dense): Linear(in_features=4096, out_features=1024, bias=True)
          (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)

## PaddleNLP

In [1]:
import os, sys
sys.path.append('/workspace/fnet_paddle/PaddleNLP')

In [2]:
import paddlenlp as ppnlp

In [4]:
torch_large_model_path = 'pretrained_model/torch/large'
p_tokenizer = ppnlp.transformers.FNetTokenizer.from_pretrained(torch_large_model_path)

In [6]:
p_tokenizer('hello, my bro')

{'input_ids': [4, 109, 6286, 16680, 275, 1145, 5],
 'token_type_ids': [0, 0, 0, 0, 0, 0, 0]}

In [50]:
# fourier transform
import numpy as np
import paddle
x = np.mgrid[:4, :4, :4][1]
xp = paddle.to_tensor(x)
fftn_xp = paddle.fft.fftn(xp, axes=(1, 2)).numpy()
print(fftn_xp)

[[[24.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+8.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.-8.j  0.+0.j  0.+0.j  0.-0.j]]

 [[24.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+8.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.-8.j  0.+0.j  0.+0.j  0.-0.j]]

 [[24.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+8.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.-8.j  0.+0.j  0.+0.j  0.-0.j]]

 [[24.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+8.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.+0.j  0.+0.j  0.+0.j  0.-0.j]
  [-8.-8.j  0.+0.j  0.+0.j  0.-0.j]]]


In [83]:
import torch
tp = torch.tensor(x)
fftn_tp = torch.fft.fftn(tp, dim=(1, 2)).numpy()
(fftn_tp==fftn_xp).all()

True

In [128]:
from functools import partial
fourier_transform = partial(paddle.fft.fftn, axes=(1, 2))

In [129]:
t_fourier_transform = partial(torch.fft.fftn, dim=(1, 2))

In [136]:
hidden = np.random.randn(3, 3,748)

In [138]:
hidden.shape

(3, 3, 748)

In [147]:
t = t_fourier_transform(torch.tensor(hidden, dtype=torch.complex64)).real

In [161]:
p = fourier_transform(paddle.to_tensor(hidden, dtype=paddle.complex64)).real()

In [165]:
t[0]

tensor([[-14.9004,   6.6395,  -9.6454,  ...,  15.2369,  -9.6454,   6.6395],
        [-12.1572, -53.5400,  45.1136,  ...,  30.6507, -26.7674, -44.2849],
        [-12.1572, -44.2849, -26.7674,  ...,  22.1350,  45.1136, -53.5401]])

In [163]:
p[0]

Tensor(shape=[3, 748], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
       [[-14.90035820,  6.63952351 , -9.64538479 , ...,  15.23688889,
         -9.64538574 ,  6.63953114 ],
        [-12.15722370, -53.54005432,  45.11361313, ...,  30.65068436,
         -26.76741409, -44.28493881],
        [-12.15722370, -44.28491592, -26.76739693, ...,  22.13502502,
          45.11362839, -53.54005432]])

In [166]:
p

Tensor(shape=[3, 3, 748], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
       [[[-14.90035820,  6.63952351 , -9.64538479 , ...,  15.23688889,
          -9.64538574 ,  6.63953114 ],
         [-12.15722370, -53.54005432,  45.11361313, ...,  30.65068436,
          -26.76741409, -44.28493881],
         [-12.15722370, -44.28491592, -26.76739693, ...,  22.13502502,
           45.11362839, -53.54005432]],

        [[ 43.08150482,  61.31409454,  17.09560966, ..., -2.66833973 ,
           17.09561729,  61.31409454],
         [ 15.10789871, -34.19815063, -16.27117729, ..., -11.98955345,
           13.75884056, -12.07743549],
         [ 15.10789871, -12.07742691,  13.75884533, ...,  11.01449394,
          -16.27115822, -34.19815826]],

        [[-28.05401421,  17.24009705, -14.18903923, ..., -2.34581757 ,
          -14.18904305,  17.24008560],
         [ 5.37189198 , -9.05312157 , -14.34867573, ...,  6.90930939 ,
           21.56071091, -36.16657257],
         [ 5.37189198 , -36.1665420