In [1]:
r"""
Graphormer module alignment"""

'\nGraphormer module alignment'

In [9]:
# select device
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [10]:
import torch, mindspore
import numpy as np

import transformers.models.graphormer.modeling_graphormer as ptm
import transformers.models.graphormer.configuration_graphormer as ptc

import mindnlp.transformers.models.graphormer.modeling_graphormer as msm
import mindnlp.transformers.models.graphormer.configuration_graphormer as msc

In [11]:
# init config
ms_config = msc.GraphormerConfig()
pt_config = ptc.GraphormerConfig()

In [12]:
# init model
ms_model = msm.GraphormerModel(ms_config)
pt_model = ptm.GraphormerModel(pt_config)

In [34]:
def convert_ms_param_name_to_pt(param_name: str):
    if 'embedding_table' in param_name:
        param_name = param_name.replace('embedding_table', 'weight')
    elif 'layer_norm'in param_name:
        param_name = param_name.replace('layer_norm.gamma', 'layer_norm.weight')
        param_name = param_name.replace('layer_norm.beta', 'layer_norm.bias')
    return param_name

In [35]:
# print ms_model parameters' name
ms_keys = ms_model.parameters_dict().keys()
ms_keys = set([convert_ms_param_name_to_pt(k) for k in ms_keys])

pt_keys = set(set(dict(pt_model.named_parameters()).keys()))
diff1 = sorted(ms_keys - pt_keys)
diff2 = sorted(pt_keys - ms_keys)
if len(diff1) or len(diff2):
    print(diff1)
    print(diff2)
else:
    print('Parameter names matched')

Parameter names matched


In [36]:
# load parameters
for key, param in ms_model.parameters_and_names():
    key = convert_ms_param_name_to_pt(key)
    param.set_data(mindspore.Tensor(pt_params.get(key).detach().numpy()))

In [37]:
# set eval mode
ms_model.set_train(False)
pt_model.eval()

GraphormerModel(
  (graph_encoder): GraphormerGraphEncoder(
    (dropout_module): Dropout(p=0.1, inplace=False)
    (graph_node_feature): GraphormerGraphNodeFeature(
      (atom_encoder): Embedding(4609, 768, padding_idx=0)
      (in_degree_encoder): Embedding(512, 768, padding_idx=0)
      (out_degree_encoder): Embedding(512, 768, padding_idx=0)
      (graph_token): Embedding(1, 768)
    )
    (graph_attn_bias): GraphormerGraphAttnBias(
      (edge_encoder): Embedding(1537, 32, padding_idx=0)
      (edge_dis_encoder): Embedding(131072, 1)
      (spatial_pos_encoder): Embedding(512, 32, padding_idx=0)
      (graph_token_virtual_distance): Embedding(1, 32)
    )
    (layers): ModuleList(
      (0): GraphormerGraphEncoderLayer(
        (dropout_module): Dropout(p=0.1, inplace=False)
        (activation_dropout_module): Dropout(p=0.1, inplace=False)
        (activation_fn): GELUActivation()
        (self_attn): GraphormerMultiheadAttention(
          (attention_dropout_module): Dropout(p=

In [49]:
#batch_size
#graph_size
ms_config.num_atoms
ms_config.num_edges
ms_config.num_spatial
ms_config.num_in_degree
ms_config.num_out_degree
ms_config.multi_hop_max_dist
ms_config.num_classes

1

In [50]:
def ids_tensor_np(shape, vocab_size):
    return np.randint(0, vocab_size-1, shape)

In [41]:
# prepare data
batch_size=10
graph_size=20

input_dict = dict(attn_bias = ids_tensor_np([batch_size, graph_size + 1, graph_size + 1], ms_config.num_atoms),
    attn_edge_type = ids_tensor_np([batch_size, graph_size, graph_size, 1], ms_config.num_edges),
    spatial_pos = ids_tensor_np([batch_size, graph_size, graph_size], ms_config.num_spatial),
    in_degree = ids_tensor_np([batch_size, graph_size], ms_config.num_in_degree),
    out_degree = ids_tensor_np([batch_size, graph_size], ms_config.num_out_degree),
    input_nodes = ids_tensor_np([batch_size, graph_size, 1], ms_config.num_atoms),
    input_edges = ids_tensor_np([batch_size, graph_size, graph_size, self.multi_hop_max_dist, 1], ms_config.num_edges),
    labels = ids_tensor_np([batch_size], ms_config.num_classes) )


x = np.random.randn(4, 64, 512)
ms_x = mindspore.Tensor(x, dtype=mindspore.float32) # dtype depends on model
pt_x = torch.tensor(x, dtype=torch.float32)         # sometimes maybe int not float

In [42]:
# output
ms_out = ms_model(ms_x)
pt_out = pt_model(pt_x)

In [43]:
# shape & loss
assert ms_out[0].shape == pt_out[0].shape
# assert ms_out[1].shape == pt_out[1].shape # NoneType
assert ms_out[2].shape == pt_out[2].shape
assert np.allclose(ms_out[0].asnumpy(), pt_out[0].detach().numpy(), 1e-5, 1e-5)
# assert np.allclose(ms_out[1].asnumpy(), pt_out[1].detach().numpy(), 1e-5, 1e-5) # NoneType
assert np.allclose(ms_out[2].asnumpy(), pt_out[2].detach().numpy(), 1e-5, 1e-5)
print("PASS")

PASS


In [49]:
def judge(o1, o2, loss = 1e-3, prefix = '-'):
    prefix += '-'
    if (isinstance(o1, tuple)):
        for i in range(len(o1)):
            judge(o1[i], o2[i], loss=loss, prefix=prefix)
    elif (isinstance(o1,mindspore.Tensor)):
        print(f"{prefix}{np.allclose(o1.asnumpy(), o2.detach().numpy(), loss, loss)}")
    else:
        print(f"{type(o1)}-{type(o2)}:{o1==o2}")

In [50]:
judge(ms_out, pt_out)

---True
<class 'NoneType'>-<class 'NoneType'>:True
---True
