In [1]:
import import_ipynb
import attention
import torch
import torch.nn as nn



class att_LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout,use_bn, attn_head,
                 attn_size, activation="ReLU"):
        super(att_LSTM,self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.attn_head = attn_head
        self.attn_size = attn_size
        self.dropout = dropout
        self.use_bn = use_bn
        self.activation = getattr(nn, activation)()

        ## 파이토치에 있는 lstm모듈
        ## output dim 은 self.regressor에서 사용됨
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        self.attention = attention.Attention(self.attn_head, self.attn_size, self.hidden_dim,
                                             self.hidden_dim, self.dropout)
        self.regression_input_size = self.attn_size + self.hidden_dim
        self.regressor = self.make_regressor()
        self.linear1 = nn.Linear(11,10)
        self.tanh1 = nn.Tanh()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        h = torch.empty(self.num_layers, self.batch_size, self.hidden_dim)
        c = torch.empty(self.num_layers, self.batch_size, self.hidden_dim)
        return (nn.init.xavier_normal_(h),
                nn.init.xavier_normal_(c))

    def make_regressor(self): # 간단한 MLP를 만드는 함수
        layers = []
        if self.use_bn:
            layers.append(nn.BatchNorm1d(self.regression_input_size))  ##  nn.BatchNorm1d
        layers.append(nn.Dropout(self.dropout))    ##  nn.Dropout

        ## hidden dim을 outputdim으로 바꿔주는 MLP
        layers.append(nn.Linear(self.regression_input_size, self.hidden_dim))
        regressor = nn.Sequential(*layers)
        return regressor

    def forward(self, x):
        x = self.tanh1(self.linear1(x.float())).transpose(0,1).float()
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        # self.hidden # 0번째가 히든스테이트임 1번째는 cell state
        # print(self.hidden[0].size()) # [1, 128, 10] # final hidden state
        # print(lstm_out.size()) # [10, 128, 10] sequence length, batch size, feature length
        hidden_context, attn_weights = self.attention(lstm_out, self.hidden[0], lstm_out)
        ## lstm_out : 각 time step에서의 lstm 모델의 output 값
        ## lstm_out[-1] : 맨마지막의 아웃풋 값으로 그 다음을 예측하고싶은 것이기 때문에 -1을 해줌
        return hidden_context, attn_weights



importing Jupyter notebook from attention.ipynb
