In [2]:
import import_ipynb
import attention
import torch
import torch.nn as nn
import torch.nn.functional as F

class Conv_attLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout,use_bn, attn_head,
                 attn_size, activation="ReLU"):
        super(Conv_attLSTM,self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.attn_head = attn_head
        self.attn_size = attn_size
        self.dropout = dropout
        self.use_bn = use_bn
        self.activation = getattr(nn, activation)()
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.lstm = nn.LSTM(input_size = self.input_dim, hidden_size = self.hidden_dim, 
                            num_layers = self.num_layers, bidirectional = True)
        self.attention = attention.Attention(self.batch_size, self.attn_head, self.attn_size, self.hidden_dim,
                                             self.hidden_dim, self.dropout)
        self.init_hidden_ = self.init_hidden()
        self.convolusion = self.make_convolusion()
        self.last_linear = nn.Linear(4*hidden_dim,1)
        self.sigmoid =  nn.Sigmoid()

        
    def init_hidden(self):
        h = torch.empty(2*self.num_layers, self.batch_size, self.hidden_dim).to(self.device)
        c = torch.empty(2*self.num_layers, self.batch_size, self.hidden_dim).to(self.device)
        return (nn.init.xavier_normal_(h),
                nn.init.xavier_normal_(c))
    
    def make_convolusion(self): # 간단한 MLP를 만드는 함수
        layers = []
        layers.append(nn.Conv1d(11, 64, kernel_size=1))
        if self.use_bn:
            layers.append(nn.BatchNorm1d(64))  ##  nn.BatchNorm1d
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool1d(5, stride=1,padding=2))
        layers.append(nn.Dropout(self.dropout))    ##  nn.Dropout
        layers.append(nn.Tanh())
        regressor = nn.Sequential(*layers)
        return regressor

    def forward(self, x):
        # self.hidden 각각의 layer의 모든 hidden state 를 갖고있음
        ## LSTM의 hidden state에는 tuple로 cell state포함, 0번째는 hidden state tensor, 1번째는 cell state
        # input dimension은 (Batch, Time_step, Feature dimension) 순이다. (batch_first=True)
        ## lstm_out : 각 time step에서의 lstm 모델의 output 값
        ## lstm_out[-1] : 맨마지막의 아웃풋 값으로 그 다음을 예측
        '''
        x : [128, 10, 11]
        conv_input : [128, 11, 10]
        conv_output : [128, 64, 10]
        lstm_input : [10, 128, 64]
        lstm_output : [10, 128, 128]  # 마지막 항 concat(64,64) <- bidirectional
        
        attn_applied : [128, 128]
        '''
        
        conv_output = self.convolusion(x.transpose(1,2).float())
        lstm_input = conv_output.transpose(0,1).transpose(0,2).float().to(self.device)
        lstm_out, self.hidden = self.lstm(lstm_input, self.init_hidden_)
        

        attn_applied, attn_weights = self.attention(lstm_out, lstm_out)

        es = torch.cat([attn_applied, lstm_out[-1]], dim=1) # es : [128,256]
        lin_es = self.last_linear(es).squeeze()
        yhat = self.sigmoid(lin_es)
        
        return yhat, attn_weights, attn_applied
        