In [1]:
import torch
import torchvision

In [2]:
import arrayfire as af
import array

In [3]:
def toArrayFire(x):
    x_np = x.detach().contiguous().numpy()
    shape = 1
    if len(x_np.shape) == 0:
        shape = (1,)
    else:
        shape = x_np.shape[::-1]
    afArray = af.Array(x_np.ctypes.data, shape, x_np.dtype.char)
    return afArray

def saveStateDict(module, filepath):
    i = 0
    for (name, param) in module.named_parameters():
        #param = module.state_dict()[name]
        print(name, "\t", param.size())
        if 'in_proj' in name:
            print(param.shape)
            q, k, v = param.chunk(3, dim=0)
            print('in_proj!')
            af.array.save_array(name + 'q', toArrayFire(q), filepath, True)
            af.array.save_array(name + 'k', toArrayFire(k), filepath, True)
            af.array.save_array(name + 'v', toArrayFire(k), filepath, True)
            continue
        if len(param.size()) > 0:
            af_array = toArrayFire(param)
            if 'fc' in name and 'weight' in name:
                af_array = af.array.transpose(af_array)
            af.array.save_array(name, af_array, filepath, True)
            i = i + 1
    print(i)
    for name in module.state_dict():
        if 'running' in name:
            print(name)
            af_array = toArrayFire(module.state_dict()[name])
            af.array.save_array(name, af_array, filepath + 'running', True)

In [4]:
from models.backbone import *
from models.position_encoding import *
from models.matcher import *

In [5]:
from models.transformer import *

In [6]:
batch_size = 1
embedding_size = 12
src_len = 5
tgt_len = 10
queries = torch.rand(tgt_len, batch_size, embedding_size)
memory = torch.rand(src_len, batch_size, embedding_size)
model = TransformerDecoderLayer(embedding_size, 1, dropout=0.0, dim_feedforward=128)
output = model.forward(queries, memory)

tensor([[[ 0.0490,  0.0873,  0.3679,  0.2326, -0.3538,  0.4081, -0.4808,
          -0.0980, -0.2038, -0.0947,  0.3269, -0.0382]],

        [[ 0.0479,  0.0888,  0.3667,  0.2319, -0.3535,  0.4089, -0.4807,
          -0.0980, -0.2073, -0.0962,  0.3247, -0.0381]],

        [[ 0.0478,  0.0899,  0.3655,  0.2316, -0.3535,  0.4091, -0.4792,
          -0.0987, -0.2110, -0.0977,  0.3198, -0.0385]],

        [[ 0.0486,  0.0893,  0.3671,  0.2326, -0.3523,  0.4077, -0.4804,
          -0.0984, -0.2056, -0.0970,  0.3229, -0.0399]],

        [[ 0.0478,  0.0899,  0.3664,  0.2310, -0.3533,  0.4095, -0.4801,
          -0.0990, -0.2121, -0.0979,  0.3207, -0.0383]],

        [[ 0.0484,  0.0892,  0.3672,  0.2315, -0.3535,  0.4095, -0.4806,
          -0.0987, -0.2113, -0.0977,  0.3225, -0.0384]],

        [[ 0.0491,  0.0873,  0.3678,  0.2329, -0.3536,  0.4079, -0.4805,
          -0.0978, -0.2030, -0.0947,  0.3267, -0.0386]],

        [[ 0.0481,  0.0894,  0.3669,  0.2322, -0.3536,  0.4092, -0.4812,
          

In [16]:
filepath = '/private/home/padentomasello/scratch/pytorch_testing/transformer_decoder_layer.array'
af.array.save_array('queries', toArrayFire(queries), filepath, False)
af.array.save_array('memory', toArrayFire(memory), filepath, True)
i = 2
params = {}
for (name, param) in model.named_parameters():
        if 'in_proj' in name:
            q, k, v = param.chunk(3, dim=0)
            hack = '0'
            if 'in_proj_bias' in name: hack = '1'
            params['0q_' + hack + name] = q
            params['1k_' + hack + name] = k
            params['2v_' + hack + name] = v
            if 'in_proj_bias' in name:
                for key in sorted(params.keys()):
                    af_array = toArrayFire(params[key])
                    if 'weight' in key:
                        af_array = af.array.transpose(af_array)
                    print(key, i, params[key].shape)
                    print(af.array.save_array(key, af_array, filepath, True))
                    i = i + 1
                params = {}
            continue
        elif len(param.size()) > 0:
            af_array = toArrayFire(param)
            if 'fc' in name and 'weight' in name:
                af_array = af.array.transpose(af_array)
            if 'weight' in name and 'proj' in name:
                af_array = af.array.transpose(af_array)
            if 'weight' in name and 'linear' in name:
                af_array = af.array.transpose(af_array)
            print(name, i, param.shape)
            print(af.array.save_array(name, af_array, filepath, True))
            i = i + 1
#af.array.save_array('output', toArrayFire(output), filepath, True)
af.array.save_array('output', toArrayFire(output), filepath, True)

0q_0in_proj_weight 2 torch.Size([128, 128])
2
0q_1in_proj_bias 3 torch.Size([128])
3
1k_0in_proj_weight 4 torch.Size([128, 128])
4
1k_1in_proj_bias 5 torch.Size([128])
5
2v_0in_proj_weight 6 torch.Size([128, 128])
6
2v_1in_proj_bias 7 torch.Size([128])
7
out_proj.weight 8 torch.Size([128, 128])
8
out_proj.bias 9 torch.Size([128])
9


10

In [8]:
output

tensor([[[ 0.8993,  0.6522,  0.0140, -0.5446, -1.3320,  2.0483,  0.3816,
          -1.0369, -1.3780, -0.8465,  0.5010,  0.6418]],

        [[-0.0706,  0.8539,  1.7758,  0.7532, -1.3990,  0.7595, -1.7490,
          -0.4076,  0.6180,  0.2756, -0.2960, -1.1138]],

        [[-0.3730, -0.6208,  2.3744,  1.5793, -0.6512,  0.5313, -0.9931,
          -0.0413, -0.4942,  0.2699, -0.9931, -0.5882]],

        [[-0.1228, -0.7571,  1.7146,  0.7535, -0.9930,  1.1182, -1.9891,
           0.0999,  0.0316, -0.2605,  1.0828, -0.6781]],

        [[-0.7519, -0.3129,  1.9925, -0.5571, -2.1034,  1.1056, -0.3702,
           0.6410,  0.2674,  0.3839,  0.3998, -0.6947]],

        [[-0.4930, -0.1527,  1.9099,  1.5434, -1.8207,  0.9104, -0.7728,
           0.3439, -0.7771, -0.1039, -0.1245, -0.4627]],

        [[ 0.1966,  0.6162,  1.5532,  0.6287, -1.0405,  1.5832, -1.5561,
           0.3241, -1.3947, -0.7082, -0.4102,  0.2077]],

        [[-0.0186,  0.0143,  1.4590,  1.9462, -1.8433,  0.5386,  0.0257,
          