In [172]:
import torch
import torch.nn as nn
import torch.nn.functional as func
#from torch.nn.utils.rnn import pack_padded_sequence

class GatedRecurrentUnitOutputAttentionNet(nn.Module):
    def __init__(self, input_size, output_size, 
                        window_size, hidden_size, num_layers, 
                        target_type_string='Regression',
                        bias=True, batch_first=True, 
                        bidirectional=False,
                        dropout_hidden=0, dropout_output=0):
        super().__init__()
        self.bidirectional = bidirectional
        self.seq_len = window_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_size=input_size, 
                            hidden_size=hidden_size, 
                            num_layers=num_layers, 
                            bias=bias, 
                            batch_first=batch_first, 
                            dropout=dropout_hidden, 
                            bidirectional=bidirectional)
        self.output_dropout_layer = nn.Dropout(dropout_output)

        self.attention_layer = nn.Linear(hidden_size * (bidirectional + 1) , 1)
        self.predict_layer = nn.Linear(hidden_size * (bidirectional + 1), output_size) #(num_directions * hidden_size)
        
        self.target_type_string = target_type_string
        if target_type_string=='Regression':
            self.loss_function = nn.MSELoss()
        elif target_type_string=='Classification':
            self.loss_function = nn.CrossEntropyLoss()
             
    def forward(self, input):
        #print('batch:', input)
        #print(input.size())
        #print(input[0])
        #print(input[0][0])
        gru_out, h_n = self.gru(input) #shape gru_out: (batch, seq_len, num_directions * hidden_size)
        #print('gru_out:', gru_out)
        #print('gru_out size:', gru_out.size(0), gru_out.size(1), gru_out.size(2))
        attn = self.attention_layer(gru_out)
        #print('attn:', attn)
        #print('attn size:', attn.size(0), attn.size(1), attn.size(2))
        sm_attn = func.softmax(attn, dim=1)
        #print('sm_attn:', sm_attn)
        #print('sm_attn size:', sm_attn.size(0), sm_attn.size(1), sm_attn.size(2))
        
        mul = gru_out*sm_attn
        #print('mul:', mul)
        #print('mul size:', mul.size(0), mul.size(1), mul.size(2))
        
        summ = torch.sum(mul, dim=1)
        #print('summ:', summ)

        out_dropout = self.output_dropout_layer(summ)
        predict = self.predict_layer(out_dropout) #predict est vertical
        #print('predict:', predict)
        #print('predict size:', predict.size())
        return predict

In [179]:
input_size = 2
output_size = 1
window_size = 2
hidden_size = 5
num_layers = 3
target_type_string='Regression'
bias=True
batch_first=True
bidirectional=False
dropout_hidden=0
dropout_output=0

In [180]:
gru = GatedRecurrentUnitOutputAttentionNet(input_size, output_size, 
                                window_size, hidden_size, num_layers, 
                                target_type_string,
                                bias, batch_first, 
                                bidirectional,
                                dropout_hidden, dropout_output)

In [181]:
inputs = [[[5.9300e-04, 1.0390e-01],[5.9300e-04, 1.0390e-01],[8.2390e-04, 2.8946e-01]]]

In [182]:
inputs = [[[7.9018e-04, 1.2779e-01],[7.9018e-04, 1.2779e-01]],
        [[5.2380e-04, 1.9846e-01],[5.2380e-04, 1.9846e-01]],
        [[8.2390e-04, 2.8946e-01],[8.2390e-04, 2.8946e-01]],
        [[5.9300e-04, 1.0390e-01],[5.9300e-04, 1.0390e-01]]]

In [183]:
torch.Tensor(inputs).size()

torch.Size([4, 2, 2])

In [184]:
gru(torch.Tensor(inputs))

gru_out: tensor([[[-0.1287, -0.0386,  0.0901,  0.0267, -0.0070],
         [-0.1909, -0.1221,  0.1289,  0.0328,  0.0081]],

        [[-0.1286, -0.0387,  0.0893,  0.0262, -0.0067],
         [-0.1901, -0.1219,  0.1266,  0.0313,  0.0087]],

        [[-0.1284, -0.0389,  0.0882,  0.0254, -0.0063],
         [-0.1889, -0.1214,  0.1238,  0.0294,  0.0094]],

        [[-0.1288, -0.0385,  0.0904,  0.0269, -0.0071],
         [-0.1912, -0.1222,  0.1296,  0.0333,  0.0079]]],
       grad_fn=<TransposeBackward0>)
gru_out size: 4 2 5
sm_attn: tensor([[[0.4851],
         [0.5149]],

        [[0.4852],
         [0.5148]],

        [[0.4854],
         [0.5146]],

        [[0.4851],
         [0.5149]]], grad_fn=<SoftmaxBackward>)
sm_attn size: 4 2 1
mul: tensor([[[-0.0624, -0.0187,  0.0437,  0.0130, -0.0034],
         [-0.0983, -0.0629,  0.0663,  0.0169,  0.0042]],

        [[-0.0624, -0.0188,  0.0433,  0.0127, -0.0032],
         [-0.0978, -0.0627,  0.0652,  0.0161,  0.0045]],

        [[-0.0623, -0.0189,  

tensor([[-0.0346],
        [-0.0344],
        [-0.0341],
        [-0.0346]], grad_fn=<AddmmBackward>)

In [163]:
d=[-0.0149, -0.0224, -0.0442, -0.0559,  0.0038,  0.1103,  0.0143,
          -0.0553,  0.1497, -0.0139]

In [164]:
e=[-0.0266, -0.0397, -0.0583, -0.0963,  0.0111,  0.0910,  0.0135,
          -0.0536,  0.1376, -0.0254]

In [165]:
f=[-0.0278, -0.0543, -0.0618, -0.1240,  0.0149,  0.0585,  0.0118,
          -0.0443,  0.1054, -0.0272]

In [171]:
g=[]
for i in range(10):
    #print(i)
    g.append(d[i]+e[i]+f[i])
print(g)

[-0.0693, -0.1164, -0.1643, -0.2762, 0.0298, 0.2598, 0.039599999999999996, -0.1532, 0.3927, -0.0665]


In [126]:
a= [-0.1694,  0.0919, -0.3994,  0.2005, -0.1659, -0.2772, -0.2371,
          -0.3158, -0.1634, -0.1596]

In [127]:
b = [x * 0.3350 for x in a]
b

[-0.056749,
 0.0307865,
 -0.133799,
 0.0671675,
 -0.0555765,
 -0.092862,
 -0.07942850000000001,
 -0.10579300000000001,
 -0.054739,
 -0.053466]

In [None]:
-0.0567,  0.0308, -0.1338,  0.0672, -0.0556, -0.0928, -0.0794,
          -0.1058, -0.0547, -0.0535