## Library

In [30]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

import torchvision.transforms as transforms
from torch.utils.data import DataLoader,TensorDataset
from sklearn.preprocessing import MinMaxScaler

## Data

In [31]:
# df = pd.read_csv('../../data/Rail_data.csv')
df = pd.read_csv('C:/Users/AML2/Desktop/TIL/data/Rail_data.csv')

# scaler = MinMaxScaler()
# scaled_col = ['air_temp','TSI','azimuth','altitude','solar_rad','High_solar_rad', 'casi', 'humidity', 'rain', 'wind_speed','wind_direction','rail_direction']
# df[scaled_col]= scaler.fit_transform(df[scaled_col])

# scaler1 = MinMaxScaler()
# df['rail_temp'] = scaler1.fit_transform(df['rail_temp'].values.reshape(-1,1))

X = df.iloc[:,:12].values
y = df.iloc[:,12].values

def sequence_data(X,y, sequence_size):
    x_seq = []
    y_seq = []
    for idx in range(len(X) - sequence_size): #len(X)가 7000이고 seq_size가 5라면?
        x_seq.append(X[idx:idx + sequence_size]) #1에서 5일차의 값을 가지고 6일차를 예측하기
        y_seq.append(y[idx:idx + sequence_size])
        
    return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_seq, dtype=torch.float32)

split = 48052
sequence_length = 5
X_seq, y_seq = sequence_data(X, y, sequence_length)

X_train_seq = X_seq[:split]
y_train_seq = y_seq[:split]
X_test_seq = X_seq[split:]
y_test_seq = y_seq[split:]


train_DS = TensorDataset(X_train_seq, y_train_seq)
test_DS = TensorDataset(X_test_seq, y_test_seq)

train_DL = DataLoader(train_DS, batch_size = 16)
test_DL = DataLoader(test_DS, batch_size = 16)




In [32]:
print(X_train_seq.shape)
print(y_train_seq.shape)

torch.Size([48052, 5, 12])
torch.Size([48052, 5])


## Model

In [34]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        # input_dim >> emb_dim으로 embedding
        self.rnn = nn.GRU(emb_dim, enc_hid_dim, bidirectional = True)
        # emb_dim >> enc_hid_him(hidden state) 뽑아내기
        self.fc = nn.Linear(enc_hid_dim*2, dec_hid_dim)
        # enc_hid_dim >> dec_hid_dim으로 선형변화
        self.dropout = nn.Dropout(dropout)
        # 일정 부분의 연결을 끊어서 성능을 올림
        
    def forward(self, x):
        embedded = self.dropout(self.embedding(x)) # x를 변수들의 dataset으로 보기?
        # embedded: x len, batch size, emb dim >> x len = sequence_length / batch_size는 임의로 설정 maybe 128?
        outputs, hidden = self.rnn(embedded)
        hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:],hidden[-1,:,:]), dim=1))) # hidden에서의 마지막 step 가져오기
        # outputs: x- len, batch size, enc_hid_dim 
        # hidden: batch size, dec_hid_dim
        return outputs, hidden
    
class Attention(nn.Module):
    def __init__(self, enc_hid_dim, dec_hid_dim):
        super().__init__()
        
        self.attn = nn.Linear((enc_hid_dim*2) + dec_hid_dim, dec_hid_dim)
        self.v = nn.Linear(dec_hid_dim, bias=False)
        # dot-product를 사용하지 않고 attention score 계산?
        
    def forward(self, hidden, encoder_outputs):
        # encoder_outputs: x len, batch size, enc_hid_dim
        # hidden: batch size, dec_hid_dim
        batch_size = encoder_outputs.shape[1] # batch size
        x_len = encoder_outputs.shape[0] # x len
        
        hidden = hidden.unsqueeze(1).repeat(1, x_len, 1) 
        # hidden: batch size, x len, dec_hid_dim
        encoder_outputs = encoder_outputs.permute(1,0,2)
        # encoder_outputs: batch size, x len, enc_hid_dim 
        energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim = 2)))
        # energy: batch size, x len, dec_hid_dim
        attention = self.v(energy).squeeze(2)
        # attention: batch size, x len
        return F.softmax(attention, dim=1)
    
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):
        super().__init__()
        
        self.output_dim = output_dim
        self.attention = attention
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU((enc_hid_dim*2) + emb_dim, output_dim)
        
        self.fc_out = nn.Linear((enc_hid_dim*2) + dec_hid_dim + emb_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, encoder_outputs):
        
        input = input.unsqueeze(0) # y가 될 것으로 보임?
        # input: batch size
        embedded = self.dropout(self.embedding(input))
        # embedded: 1, batch size, emb_dim
        a = self.attention(hidden, encoder_outputs)
        # a: batch size, x len
        a = a.unsqueeze(1)
        # a: batch size, 1, x len
        encoder_outputs = encoder_outputs.permute(1,0,2)
        # encoder_outputs: batch size, x len, enc_hid_dim
        weighted = torch.bmm(a, encoder_outputs) # 행렬곱 >> (1*x len) X (x len * enc_hid_dim)
        # weighted: batch size, 1, enc_hid_dim*2
        weighted = weighted.permute(1,0,2)
        # weighted: 1, batch size, enc_hid_dim*2
        rnn_input = torch.cat((embedded, weighted), dim =2)
        # rnn_input: 1, batch size, emb_dim+(enc_hid_dim*2)
        
        output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0)) 
        # output: 1, batch size, dec_hid_dim
        # hidden: 1, batch size, dec_hid_dim
        assert (output == hidden).all() # 괄호 안 진위여부 확인
        
        embedded = embedded.squeeze(0) # embedded: batch size, emb_dim
        output = output.squeeze(0)     # output: batch size, dec_hid_dim
        weighted = weighted.squeeze(0) # weighted: batch size, enc_hid_dim
        
        prediction = self.fc_out(torch.cat((output, weighted, embedded), dim=1))
        # dim=1에서의 size는 다른데 torch.cat 가능?
        # prediction: batch size, output dim
        return prediction, hidden.squeeze(0)
    
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
    def forward(self, x, y, teacher_forcing_ratio = 0.5):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use teacher forcing 75% of the time
        
        batch_size = x.shape[1]
        trg_len = y.shape[0]
        trg_vocab_size = self.decoder.output_dim
        # batch size?
        
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        encoder_outputs, hidden = self.encoder(x)
                
        
        input = y[0,:]
        
        for t in range(1, trg_len):
            
            
            output, hidden = self.decoder(input, hidden, encoder_outputs)
            
            outputs[t] = output
            
            teacher_force = random.random() < teacher_forcing_ratio
            
            top1 = output.argmax(1) 
            
            input = y[t] if teacher_force else top1

        return outputs

In [None]:
input_dim = 
output_dim = 
enc_emb_dim = 256
dec_emb_dim = 256
enc_hid_dim = 512
dec_hid_dim = 512
enc_dropout = 0.5
dec_dropout = 0.5

attn = Attention(enc_hid_dim, dec_hid_dim)
enc = Encoder(input_dim, enc_emb_dim, enc_hid_dim, dec_hid_dim, enc_dropout)
dec = Decoder(output_dim, dec_emb_dim, enc_hid_dim, dec_hid_dim, dec_dropout, attn)

In [None]:
# def init_weights(m):
#     for name, param in m.named_parameters():
#         if 'weight' in name:
#             nn.init.normal_(param.data, mean=0, std=0.01)
#         else:
#             nn.init.constant_(param.data, 0)
            
# model.apply(init_weights)
            

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #gpu 활성화 확인
model = Seq2Seq(enc, dec, device).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

loss_graph = []
n = len(train_DL)

for epoch in range(100):
    running_loss = 0
    for data in train_DL:
        seq, target = data[0].to(device), data[1].to(device)
        
        out = model(seq)
        loss = criterion(out, target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    loss_graph.append(running_loss/n)
    if epoch % 20 == 0:
        print("[epoch: %d] loss : %.4f" %(epoch,running_loss/n))

In [None]:
concatdata = torch.utils.data.ConcatDataset([train_DS, test_DS])
data_loader = DataLoader(dataset=concatdata, batch_size= 16)

model.eval()
with torch.no_grad():
    pred = []
    for data in data_loader:
        seq, target = data[0].to(device), data[1].to(device)
        out = model(seq)
        pred += out.cpu().tolist()