In [1]:
import numpy as np
import torch

### Definições de pesos e features

In [104]:
node_feature = np.array([[0.5, 0.9, 0.6],[0.9, 0.2, 0.7],[0.3, 0.2, 0.5],[0.2, 0.2, 0.1]])

In [105]:
node_feature

array([[0.5, 0.9, 0.6],
       [0.9, 0.2, 0.7],
       [0.3, 0.2, 0.5],
       [0.2, 0.2, 0.1]])

In [106]:
edge_feature = np.array([[0.7, 0.2],[0.3, 0.1],[0.4, 0.7]])

In [107]:
edge_feature

array([[0.7, 0.2],
       [0.3, 0.1],
       [0.4, 0.7]])

In [108]:
weight_node = np.array([[0.2, -0.1, 0.3, 0.5, 0.1], [0.5, 0.4, -0.7, 0.3, -0.8], [0.2, 0.1, -0.2, -0.4, 0.3]])

In [109]:
weight_node

array([[ 0.2, -0.1,  0.3,  0.5,  0.1],
       [ 0.5,  0.4, -0.7,  0.3, -0.8],
       [ 0.2,  0.1, -0.2, -0.4,  0.3]])

In [110]:
weight_node.shape

(3, 5)

In [111]:
weight_edge = np.array([[0.1, -0.3, 0.5, 0.2], [-0.4, 0.1, 0.9, 0.4]])

In [112]:
weight_edge

array([[ 0.1, -0.3,  0.5,  0.2],
       [-0.4,  0.1,  0.9,  0.4]])

In [113]:
parameter_vector_node = np.array([0.8, 0.2, -0.3, -0.4, 0.5, -0.1, 0.2, 0.4, -1.0, -0.4])
parameter_vector_edge = np.array([0.3, -0.5, 0.9, -0.7, 0.3, 0.6, 0.2, -0.3, 0.2])

In [114]:
parameter_vector_node

array([ 0.8,  0.2, -0.3, -0.4,  0.5, -0.1,  0.2,  0.4, -1. , -0.4])

In [115]:
parameter_vector_edge

array([ 0.3, -0.5,  0.9, -0.7,  0.3,  0.6,  0.2, -0.3,  0.2])

In [116]:
node_to_node_adj_matrix = np.array([[1.0, 0.0, 0.0, 0.0],[1.0, 1.0, 0.0, 1.0],[0.0, 1.0, 1.0, 0.0],[0.0, 0.0, 0.0, 1.0]])

In [117]:
node_to_node_adj_matrix

array([[1., 0., 0., 0.],
       [1., 1., 0., 1.],
       [0., 1., 1., 0.],
       [0., 0., 0., 1.]])

In [118]:
node_to_edge_adj_matrix = np.array([[1.0, 1.0, 0.0, 0.0],[0.0, 1.0, 1.0, 0.0],[0.0, 1.0, 0.0, 1.0]])

In [119]:
node_to_edge_adj_matrix

array([[1., 1., 0., 0.],
       [0., 1., 1., 0.],
       [0., 1., 0., 1.]])

In [120]:
edge_to_node_adj_matrix = np.array([[0.0, 0.0, 0.0],[1.0, 0.0, 1.0],[0.0, 1.0, 0.0], [0.0, 0.0, 0.0]])

In [121]:
edge_to_node_adj_matrix

array([[0., 0., 0.],
       [1., 0., 1.],
       [0., 1., 0.],
       [0., 0., 0.]])

In [122]:
edge_to_edge_adj_matrix = np.array([[1.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0, 1.0]])

In [123]:
edge_to_edge_adj_matrix

array([[1., 0., 0.],
       [1., 1., 1.],
       [0., 0., 1.]])

# Node-Level Attention Layer (node step)

## Etapa 1 - Geração de embeddings

In [124]:
node_embeds = torch.matmul(torch.FloatTensor(node_feature), torch.FloatTensor(weight_node))

In [125]:
node_embeds

tensor([[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
        [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
        [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
        [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]])

In [126]:
edge_embeds = torch.matmul(torch.FloatTensor(edge_feature), torch.FloatTensor(weight_edge))

In [127]:
edge_embeds

tensor([[-0.0100, -0.1900,  0.5300,  0.2200],
        [-0.0100, -0.0800,  0.2400,  0.1000],
        [-0.2400, -0.0500,  0.8300,  0.3600]])

## Etapa 3 - Fazer o concat

In [128]:
w1 = node_embeds.tile([1, node_embeds.shape[0]]).reshape([node_embeds.shape[0], node_embeds.shape[0], node_embeds.shape[1]])

In [129]:
w1

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900]],

        [[ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400]],

        [[ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200]],

        [[ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]]])

In [130]:
w2 = node_embeds.tile([node_embeds.shape[0], 1]).reshape([node_embeds.shape[0], node_embeds.shape[0], node_embeds.shape[1]])

In [131]:
w2

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]],

        [[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]],

        [[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]],

        [[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]]])

In [132]:
concat_result_n = torch.cat((w1, w2), dim=2)

In [133]:
concat_result_n

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900,  0.6700,  0.3700,
          -0.6000,  0.2800, -0.4900],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900,  0.4200,  0.0600,
          -0.0100,  0.2300,  0.1400],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900,  0.2600,  0.1000,
          -0.1500,  0.0100,  0.0200],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900,  0.1600,  0.0700,
          -0.1000,  0.1200, -0.1100]],

        [[ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400,  0.6700,  0.3700,
          -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400,  0.4200,  0.0600,
          -0.0100,  0.2300,  0.1400],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400,  0.2600,  0.1000,
          -0.1500,  0.0100,  0.0200],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400,  0.1600,  0.0700,
          -0.1000,  0.1200, -0.1100]],

        [[ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200,  0.6700,  0.3700,
          -0.6000,  0.2800, -0.4900

### Multiplicar resultado da concatenação pelo vetor de parâmetros

In [136]:
parameter_vector_node = torch.FloatTensor(parameter_vector_node)

In [137]:
attention_output_n = torch.matmul(concat_result_n, parameter_vector_node)

In [138]:
attention_output_n

tensor([[ 0.1160,  0.1130,  0.3490,  0.3150],
        [ 0.0120,  0.0090,  0.2450,  0.2110],
        [-0.0380, -0.0410,  0.1950,  0.1610],
        [-0.2480, -0.2510, -0.0150, -0.0490]])

### LeakyReLU

In [141]:
edge_activation = torch.nn.LeakyReLU()

In [142]:
attention_output_n = edge_activation(attention_output_n)

In [143]:
attention_output_n

tensor([[ 1.1600e-01,  1.1300e-01,  3.4900e-01,  3.1500e-01],
        [ 1.2000e-02,  9.0000e-03,  2.4500e-01,  2.1100e-01],
        [-3.8000e-04, -4.1000e-04,  1.9500e-01,  1.6100e-01],
        [-2.4800e-03, -2.5100e-03, -1.5000e-04, -4.9000e-04]])

### Softmax

In [144]:
importance_coeficients = (torch.exp(attention_output_n)/(torch.exp(attention_output_n)*node_to_node_adj_matrix).sum(axis=1)[:, None])*node_to_node_adj_matrix

In [145]:
importance_coeficients

tensor([[1.0000, 0.0000, 0.0000, 0.0000],
        [0.3108, 0.3099, 0.0000, 0.3793],
        [0.0000, 0.4513, 0.5487, 0.0000],
        [0.0000, 0.0000, 0.0000, 1.0000]], dtype=torch.float64)

### Fazer a conta de acordo com a importância

In [147]:
denominator = node_to_node_adj_matrix.sum(axis=1)

In [148]:
denominator

array([1., 3., 2., 1.])

In [149]:
embed_propagated = node_embeds.tile([node_embeds.shape[0], 1]).reshape([node_embeds.shape[0], node_embeds.shape[0], node_embeds.shape[1]])

In [151]:
embed_propagated

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]],

        [[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]],

        [[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]],

        [[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]]])

In [152]:
suma = importance_coeficients.reshape([importance_coeficients.shape[1], importance_coeficients.shape[0], 1]) * embed_propagated

In [153]:
suma

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.0000,  0.0000, -0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -0.0000,  0.0000, -0.0000]],

        [[ 0.2083,  0.1150, -0.1865,  0.0870, -0.1523],
         [ 0.1302,  0.0186, -0.0031,  0.0713,  0.0434],
         [ 0.0000,  0.0000, -0.0000,  0.0000,  0.0000],
         [ 0.0607,  0.0265, -0.0379,  0.0455, -0.0417]],

        [[ 0.0000,  0.0000, -0.0000,  0.0000, -0.0000],
         [ 0.1895,  0.0271, -0.0045,  0.1038,  0.0632],
         [ 0.1427,  0.0549, -0.0823,  0.0055,  0.0110],
         [ 0.0000,  0.0000, -0.0000,  0.0000, -0.0000]],

        [[ 0.0000,  0.0000, -0.0000,  0.0000, -0.0000],
         [ 0.0000,  0.0000, -0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -0.0000,  0.0000,  0.0000],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]]], dtype=torch.float64)

In [154]:
final_embeds_n = suma.sum(axis=1)/denominator[:, None]

In [155]:
final_embeds_n

tensor([[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
        [ 0.1330,  0.0534, -0.0758,  0.0679, -0.0502],
        [ 0.1661,  0.0410, -0.0434,  0.0546,  0.0371],
        [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]], dtype=torch.float64)

In [156]:
final_activation = torch.nn.LeakyReLU()

In [157]:
final_embeds_n = final_activation(final_embeds_n)  

In [158]:
final_embeds_n

tensor([[ 6.7000e-01,  3.7000e-01, -6.0000e-03,  2.8000e-01, -4.9000e-03],
        [ 1.3303e-01,  5.3383e-02, -7.5841e-04,  6.7941e-02, -5.0214e-04],
        [ 1.6610e-01,  4.0974e-02, -4.3409e-04,  5.4643e-02,  3.7078e-02],
        [ 1.6000e-01,  7.0000e-02, -1.0000e-03,  1.2000e-01, -1.1000e-03]],
       dtype=torch.float64)

# Node-Level Attention Layer (edge step)

In [159]:
edge_embeds

tensor([[-0.0100, -0.1900,  0.5300,  0.2200],
        [-0.0100, -0.0800,  0.2400,  0.1000],
        [-0.2400, -0.0500,  0.8300,  0.3600]])

### Etapa 3 - fazer o concat

In [160]:
w1 = node_embeds.tile([1, edge_embeds.shape[0]]).reshape([node_embeds.shape[0], edge_embeds.shape[0], node_embeds.shape[1]])

In [161]:
w1

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900]],

        [[ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400]],

        [[ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200]],

        [[ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100],
         [ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100]]])

In [162]:
edge_embeds

tensor([[-0.0100, -0.1900,  0.5300,  0.2200],
        [-0.0100, -0.0800,  0.2400,  0.1000],
        [-0.2400, -0.0500,  0.8300,  0.3600]])

In [163]:
w2 = edge_embeds.tile([node_embeds.shape[0], 1]).reshape([node_embeds.shape[0], edge_embeds.shape[0], edge_embeds.shape[1]])

In [164]:
w2

tensor([[[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]],

        [[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]],

        [[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]],

        [[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]]])

In [166]:
concat_result_e = torch.cat((w1, w2), dim=2)

In [167]:
concat_result_e

tensor([[[ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900, -0.0100, -0.1900,
           0.5300,  0.2200],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900, -0.0100, -0.0800,
           0.2400,  0.1000],
         [ 0.6700,  0.3700, -0.6000,  0.2800, -0.4900, -0.2400, -0.0500,
           0.8300,  0.3600]],

        [[ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400, -0.0100, -0.1900,
           0.5300,  0.2200],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400, -0.0100, -0.0800,
           0.2400,  0.1000],
         [ 0.4200,  0.0600, -0.0100,  0.2300,  0.1400, -0.2400, -0.0500,
           0.8300,  0.3600]],

        [[ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200, -0.0100, -0.1900,
           0.5300,  0.2200],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200, -0.0100, -0.0800,
           0.2400,  0.1000],
         [ 0.2600,  0.1000, -0.1500,  0.0100,  0.0200, -0.2400, -0.0500,
           0.8300,  0.3600]],

        [[ 0.1600,  0.0700, -0.1000,  0.1200, -0.1100, -0.0100, -0.1900,
   

### Multiplicar resultado da concatenação pelo vetor de parâmetros

In [168]:
parameter_vector_edge = torch.FloatTensor(parameter_vector_edge)

In [169]:
attention_output_e = torch.matmul(concat_result_e, parameter_vector_edge)

In [171]:
attention_output_e

tensor([[-1.0260, -0.9410, -1.1980],
        [-0.1910, -0.1060, -0.3630],
        [-0.2670, -0.1820, -0.4390],
        [-0.3530, -0.2680, -0.5250]])

### LeakyReLU

In [172]:
edge_activation = torch.nn.LeakyReLU()

In [173]:
attention_output_e = edge_activation(attention_output_e)

In [174]:
attention_output_e

tensor([[-0.0103, -0.0094, -0.0120],
        [-0.0019, -0.0011, -0.0036],
        [-0.0027, -0.0018, -0.0044],
        [-0.0035, -0.0027, -0.0052]])

### Softmax

In [176]:
edge_to_node_adj_matrix

array([[0., 0., 0.],
       [1., 0., 1.],
       [0., 1., 0.],
       [0., 0., 0.]])

In [178]:
importance_coeficients = torch.nan_to_num((torch.exp(attention_output_e)/(torch.exp(attention_output_e)*edge_to_node_adj_matrix).sum(axis=1)[:, None])*edge_to_node_adj_matrix)

  importance_coeficients = torch.nan_to_num((torch.exp(attention_output_e)/(torch.exp(attention_output_e)*edge_to_node_adj_matrix).sum(axis=1)[:, None])*edge_to_node_adj_matrix)


In [179]:
importance_coeficients

tensor([[0.0000, 0.0000, 0.0000],
        [0.5004, 0.0000, 0.4996],
        [0.0000, 1.0000, 0.0000],
        [0.0000, 0.0000, 0.0000]], dtype=torch.float64)

### Fazer a conta de acordo com a importância

In [180]:
denominator = edge_to_node_adj_matrix.sum(axis=1)

In [181]:
denominator

array([0., 2., 1., 0.])

In [182]:
embed_propagated = edge_embeds.tile([node_embeds.shape[0], 1]).reshape([node_embeds.shape[0], edge_embeds.shape[0], edge_embeds.shape[1]])

In [183]:
embed_propagated

tensor([[[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]],

        [[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]],

        [[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]],

        [[-0.0100, -0.1900,  0.5300,  0.2200],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.2400, -0.0500,  0.8300,  0.3600]]])

In [184]:
suma = importance_coeficients.reshape([importance_coeficients.shape[0], importance_coeficients.shape[1], 1]) * embed_propagated

In [185]:
suma

tensor([[[-0.0000, -0.0000,  0.0000,  0.0000],
         [-0.0000, -0.0000,  0.0000,  0.0000],
         [-0.0000, -0.0000,  0.0000,  0.0000]],

        [[-0.0050, -0.0951,  0.2652,  0.1101],
         [-0.0000, -0.0000,  0.0000,  0.0000],
         [-0.1199, -0.0250,  0.4146,  0.1798]],

        [[-0.0000, -0.0000,  0.0000,  0.0000],
         [-0.0100, -0.0800,  0.2400,  0.1000],
         [-0.0000, -0.0000,  0.0000,  0.0000]],

        [[-0.0000, -0.0000,  0.0000,  0.0000],
         [-0.0000, -0.0000,  0.0000,  0.0000],
         [-0.0000, -0.0000,  0.0000,  0.0000]]], dtype=torch.float64)

In [194]:
final_embeds_e = torch.nan_to_num(suma.sum(axis=1)/denominator[:, None])

  final_embeds_e = torch.nan_to_num(suma.sum(axis=1)/denominator[:, None])


In [195]:
final_embeds_e

tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
        [-0.0625, -0.0600,  0.3399,  0.1450],
        [-0.0100, -0.0800,  0.2400,  0.1000],
        [ 0.0000,  0.0000,  0.0000,  0.0000]], dtype=torch.float64)

In [196]:
final_embeds_e = final_activation(final_embeds_e)  

In [197]:
final_embeds_e

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-6.2451e-04, -6.0030e-04,  3.3994e-01,  1.4497e-01],
        [-1.0000e-04, -8.0000e-04,  2.4000e-01,  1.0000e-01],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],
       dtype=torch.float64)

In [198]:
final_embeds_e

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-6.2451e-04, -6.0030e-04,  3.3994e-01,  1.4497e-01],
        [-1.0000e-04, -8.0000e-04,  2.4000e-01,  1.0000e-01],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],
       dtype=torch.float64)

In [199]:
output = torch.cat([final_embeds_n, final_embeds_e], dim=1)

In [200]:
output

tensor([[ 6.7000e-01,  3.7000e-01, -6.0000e-03,  2.8000e-01, -4.9000e-03,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 1.3303e-01,  5.3383e-02, -7.5841e-04,  6.7941e-02, -5.0214e-04,
         -6.2451e-04, -6.0030e-04,  3.3994e-01,  1.4497e-01],
        [ 1.6610e-01,  4.0974e-02, -4.3409e-04,  5.4643e-02,  3.7078e-02,
         -1.0000e-04, -8.0000e-04,  2.4000e-01,  1.0000e-01],
        [ 1.6000e-01,  7.0000e-02, -1.0000e-03,  1.2000e-01, -1.1000e-03,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],
       dtype=torch.float64)