In [1]:
import import_ipynb
import attention
import torch
import torch.nn as nn

class attLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout,use_bn, attn_head,
                 attn_size, activation="ReLU"):
        super(attLSTM,self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.attn_head = attn_head
        self.attn_size = attn_size
        self.dropout = dropout
        self.use_bn = use_bn
        self.activation = getattr(nn, activation)()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        ## 파이토치에 있는 lstm모듈
        ## output dim 은 self.regressor에서 사용됨
        
        self.feature_linear = nn.Linear(11,self.hidden_dim)
        self.tanh = nn.Tanh()
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        self.attention = attention.Attention(self.batch_size,self.attn_head, self.attn_size, self.hidden_dim,
                                             self.hidden_dim, self.dropout)
        self.regression_input_size = self.attn_size + self.hidden_dim
        self.regressor = self.make_regressor()
        self.init_hidden_ = self.init_hidden()


        
    def init_hidden(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        h = torch.empty(self.num_layers, self.batch_size, self.hidden_dim).to(device)
        c = torch.empty(self.num_layers, self.batch_size, self.hidden_dim).to(device)
        return (nn.init.xavier_normal_(h),
                nn.init.xavier_normal_(c))
    
    def make_regressor(self): # 간단한 MLP를 만드는 함수
        layers = []
        if self.use_bn:
            layers.append(nn.BatchNorm1d(self.regression_input_size))  ##  nn.BatchNorm1d
        layers.append(nn.Dropout(self.dropout))    ##  nn.Dropout

        ## hidden dim을 outputdim으로 바꿔주는 MLP
        layers.append(nn.Linear(self.regression_input_size, 1))
        regressor = nn.Sequential(*layers)
        return regressor

    def forward(self, x):
        '''
        input_x : [10, 64, 10]
        '''

        # self.hidden 각각의 layer의 모든 hidden state 를 갖고있음
        ## LSTM의 hidden state에는 tuple로 cell state포함, 0번째는 hidden state tensor, 1번째는 cell state

        input_x = self.tanh(self.feature_linear(x.float())).transpose(0,1).float().to(self.device)

        lstm_out, self.hidden = self.lstm(input_x, self.init_hidden_)

        hidden_states = self.hidden[0] ## 0번째가 히든스테이트임 1번째는 cell state
        #  print(hidden_states.size()) [1, 128, 10]
        attn_applied, attn_weights = self.attention(lstm_out, lstm_out)
        ## lstm_out : 각 time step에서의 lstm 모델의 output 값
        ## lstm_out[-1] : 맨마지막의 아웃풋 값으로 그 다음을 예측하고싶은 것이기 때문에 -1을 해줌

        es = torch.cat([attn_applied, self.hidden[0]], dim=2).view(self.batch_size, -1)
                                                             
        return es, attn_weights, attn_applied

importing Jupyter notebook from attention.ipynb
