In [1]:
r"""
Graphormer module alignment"""

'\nGraphormer module alignment'

In [1]:
# select device
import os 
#os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import torch
import mindspore as ms
import numpy as np

import transformers.models.graphormer.modeling_graphormer as ptm
import transformers.models.graphormer.configuration_graphormer as ptc

import mindnlp.transformers.models.graphormer.modeling_graphormer as msm
import mindnlp.transformers.models.graphormer.configuration_graphormer as msc

  from .autonotebook import tqdm as notebook_tqdm
2023-11-15 15:37:35,950 - modelscope - INFO - PyTorch version 1.12.0+cu116 Found.
2023-11-15 15:37:35,952 - modelscope - INFO - Loading ast index from /home/hubo/.cache/modelscope/ast_indexer
2023-11-15 15:37:35,975 - modelscope - INFO - Loading done! Current index file version is 1.9.4, with md5 d9a86a06b3e111926fdc559cc3e2c156 and a total number of 945 components indexed


In [3]:
# init config
ms_config = msc.GraphormerConfig()
pt_config = ptc.GraphormerConfig()

In [4]:
# init model
ms_model = msm.GraphormerModel(ms_config)
pt_model = ptm.GraphormerModel(pt_config)

In [5]:
def ms_param_name_to_pt(param_name: str):
    if 'embedding_table' in param_name:
        param_name = param_name.replace('embedding_table', 'weight')
    elif 'layer_norm'in param_name:
        param_name = param_name.replace('layer_norm.gamma', 'layer_norm.weight')
        param_name = param_name.replace('layer_norm.beta', 'layer_norm.bias')
    return param_name

In [6]:
# print ms_model parameters' name
ms_keys = ms_model.parameters_dict().keys()
ms_keys = set([ms_param_name_to_pt(k) for k in ms_keys])

pt_params = dict(pt_model.named_parameters())
pt_keys = set(set(pt_params.keys()))
diff1 = sorted(ms_keys - pt_keys)
diff2 = sorted(pt_keys - ms_keys)
if len(diff1) or len(diff2):
    print(diff1)
    print(diff2)
else:
    print('Parameter names matched')

Parameter names matched


In [7]:
# load parameters
for key, param in ms_model.parameters_and_names():
    key = ms_param_name_to_pt(key)
    param.set_data(ms.Tensor(pt_params.get(key).detach().numpy()))

In [8]:
# set eval mode
ms_model.set_train(False)
pt_model.eval()

GraphormerModel(
  (graph_encoder): GraphormerGraphEncoder(
    (dropout_module): Dropout(p=0.1, inplace=False)
    (graph_node_feature): GraphormerGraphNodeFeature(
      (atom_encoder): Embedding(4609, 768, padding_idx=0)
      (in_degree_encoder): Embedding(512, 768, padding_idx=0)
      (out_degree_encoder): Embedding(512, 768, padding_idx=0)
      (graph_token): Embedding(1, 768)
    )
    (graph_attn_bias): GraphormerGraphAttnBias(
      (edge_encoder): Embedding(1537, 32, padding_idx=0)
      (edge_dis_encoder): Embedding(131072, 1)
      (spatial_pos_encoder): Embedding(512, 32, padding_idx=0)
      (graph_token_virtual_distance): Embedding(1, 32)
    )
    (layers): ModuleList(
      (0): GraphormerGraphEncoderLayer(
        (dropout_module): Dropout(p=0.1, inplace=False)
        (activation_dropout_module): Dropout(p=0.1, inplace=False)
        (activation_fn): GELUActivation()
        (self_attn): GraphormerMultiheadAttention(
          (attention_dropout_module): Dropout(p=

In [9]:
def ids_tensor_np(shape, vocab_size):
    print(vocab_size)
    return np.random.randint(0, vocab_size-1, shape)

In [10]:
def np_inputs_to_ms(inputs):
    inputs_ms = dict()
    for key, value in inputs.items():
        if key == 'attn_bias':
            inputs_ms[key] = ms.tensor(value, dtype=ms.float32)
        else:
            inputs_ms[key] = ms.tensor(value, dtype=ms.int64)
    return inputs_ms

def np_inputs_to_pt(inputs):
    inputs_pt = dict()
    for key, value in inputs.items():
        if key == 'attn_bias':
            inputs_pt[key] = torch.tensor(value, dtype=torch.float32)
        else:
            inputs_pt[key] = torch.tensor(value, dtype=torch.int64)
    return inputs_pt
        

In [11]:
# prepare data
batch_size=10
graph_size=20

inputs = dict(
    input_nodes = ids_tensor_np([batch_size, graph_size, 1], ms_config.num_atoms),
    input_edges = ids_tensor_np([batch_size, graph_size, graph_size, ms_config.multi_hop_max_dist, 1], ms_config.num_edges),
    attn_bias = ids_tensor_np([batch_size, graph_size + 1, graph_size + 1], ms_config.num_atoms),
    in_degree = ids_tensor_np([batch_size, graph_size], ms_config.num_in_degree),
    out_degree = ids_tensor_np([batch_size, graph_size], ms_config.num_out_degree),
    spatial_pos = ids_tensor_np([batch_size, graph_size, graph_size], ms_config.num_spatial),
    attn_edge_type = ids_tensor_np([batch_size, graph_size, graph_size, 1], ms_config.num_edges)
)


inputs_ms = np_inputs_to_ms(inputs)
inputs_pt = np_inputs_to_pt(inputs)

4608
1536
4608
512
512
512
1536


In [12]:
# output
ms_out = ms_model(**inputs_ms)
pt_out = pt_model(**inputs_pt)

In [20]:
ms_out.keys()

odict_keys(['last_hidden_state', 'hidden_states'])

In [23]:
# Check that the outputs have same shape
assert ms_out['last_hidden_state'].shape == pt_out['last_hidden_state'].shape
assert len(ms_out['hidden_states']) == len(pt_out['hidden_states'])

for k in range(len(ms_out['hidden_states'])):
    assert ms_out['hidden_states'][k].shape == pt_out['hidden_states'][k].shape

In [46]:
def judge(o1, o2, loss = 1e-3, prefix = '-'):
    prefix += '-'
    if (isinstance(o1, tuple)):
        for i in range(len(o1)):
            judge(o1[i], o2[i], loss=loss, prefix=prefix)
    elif (isinstance(o1, ms.Tensor)):
        print(f"{prefix}{np.allclose(o1.asnumpy(), o2.detach().numpy(), loss, loss)}")
    else:
        print(f"{type(o1)}-{type(o2)}:{o1==o2}")



In [47]:
# Check that the outputs difference are within tolerable range
loss = 5e-3

judge(ms_out['hidden_states'], pt_out['hidden_states'], loss)

for k in range(len(ms_out['hidden_states'])):
    judge(ms_out['hidden_states'][k], pt_out['hidden_states'][k], loss)


<class 'list'>-<class 'list'>:False
--True
--True
--True
--True
--True
--True
--True
--True
--True
--True
--True
--True
--True
