In [31]:
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

import torchvision.transforms as transforms
from torch.utils.data import DataLoader,TensorDataset
from sklearn.preprocessing import MinMaxScaler

In [32]:
sequence_length = 10 # 원하는 sequence_length 설정
batch_size = 36 # 원하는 batch_size 설정 >> 6시간 기준
hidden_size = 16 # 원하는 hidden_size 설정
input_size = 11 # 사용하는 feature의 수
output_size = 1

In [33]:
# df = pd.read_csv('../../data/Rail_data.csv')
df = pd.read_csv('../../data/Rail_data.csv')

# scaler = MinMaxScaler()
# scaled_col = ['air_temp','TSI','azimuth','altitude','solar_rad','High_solar_rad', 'casi', 'humidity', 'rain', 'wind_speed','wind_direction','rail_direction']
# df[scaled_col]= scaler.fit_transform(df[scaled_col])

# scaler1 = MinMaxScaler()
# df['rail_temp'] = scaler1.fit_transform(df['rail_temp'].values.reshape(-1,1))

df = df.astype({'solar_rad': 'float64'})
df = df.astype({'High_solar_rad': 'float64'})
df = df.astype({'casi': 'float64'})
df = df.astype({'humidity': 'float64'})
df = df.astype({'wind_speed': 'float64'})
df = df.drop(['rail_direction'], axis=1)
# int type >> float type

X = df.iloc[:,:11].values
y = df.iloc[:,11].values


def sequence_data(X,y, sequence_size): # 원하는 sequence에 따라 데이터 분리
    x_seq = []
    y_seq = []
    for idx in range(len(X) - sequence_size): #len(X)가 7000이고 seq_size가 5라면?
        x_seq.append(X[idx:idx + sequence_size]) # sequence_lengh개씩 특성들을 모두 묶음 >> shape: 5, 11
        y_seq.append(y[idx + sequence_size])     # x에 따른 온도들을 묶음 >> shape: 5, 1
        
    return torch.tensor(x_seq, dtype=torch.float32), torch.tensor(y_seq, dtype=torch.float32).view(-1,1)


X_seq, y_seq = sequence_data(X, y, sequence_length) # 원하는 sequence_length에 따라 데이터 묶기

X_train, X_test = X_seq[:int(len(X_seq)*0.7)], X_seq[int(len(X_seq)*0.7):]
y_train, y_test = y_seq[:int(len(y_seq)*0.7)], y_seq[int(len(y_seq)*0.7):]

train_DS = TensorDataset(X_train, y_train)
test_DS = TensorDataset(X_test, y_test)

train_DL = DataLoader(train_DS, batch_size = batch_size*2)
test_DL = DataLoader(test_DS, batch_size = batch_size*2)
# batch_size에 따라 데이터 처리

In [34]:
data = next(iter(train_DL))
print(data[0].shape)
print(data[1].shape)

torch.Size([72, 10, 11])
torch.Size([72, 1])


In [35]:
class LSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, device):
        super(LSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.device = device
        
        self.LSTM = nn.LSTM(input_size, hidden_size, batch_first=True).to(self.device)
        # hidden_size를 가진 hidden_state 출력
        
    def forward(self, x): 
        _, enc_hid = self.LSTM(x) 
        # hidden: 1, 36, 16(1, batch, hidden_size) >> (1, 16)짜리가 36개(각각의 state마다의 hidden_state를 포함)
        
        return enc_hid
    
        
class LSTMDecoder(nn.Module):
    def __init__(self, hidden_size,  device):
        super(LSTMDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.device = device
        
        self.LSTM = nn.LSTM(hidden_size, hidden_size, batch_first=True).to(self.device)
        
    def forward(self, x):
        _, dec_hid = self.LSTM(x)
     
        return dec_hid
        
        # prediction을 다시 input으로 decoder 투입
        # prediction이 input으로 들어가서 다시 attention한 후에 next prediction을 출력
        # attention 후에 decoder 내부에서 기존의 attention처럼 진행하는 것이 가능한가?
        # prediction의 hidden_state를 이용해서 next prediction을 하는 것이 예측에 도움을 줄 수 있는가? 
        
class Attention(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, sequence_length, device):
        super(Attention, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.sequence_length = sequence_length
        self.device = device 
        self.encoder = LSTMEncoder(input_size, hidden_size, device).to(self.device)
        self.decoder = LSTMDecoder(hidden_size, hidden_size, device).to(self.device)
        
        self.to_query = nn.Linear(hidden_size, hidden_size).to(self.device)
        self.to_key = nn.Linear(hidden_size, hidden_size).to(self.device)
        self.to_value = nn.Linear(hidden_size, hidden_size).to(self.device)
        
        
        self.LSTM = nn.LSTM(hidden_size, output_size, batch_first = True).to(self.device)
        self.fc = nn.Linear(input_size, hidden_size)
        self.fc0 = nn.Linear(hidden_size * 2, hidden_size)
        # hidden * 2 >> Concatenated hidden, hidden >> original hidden
        self.fc1 = nn.Linear(sequence_length * hidden_size, output_size)    
        
    def forward(self, x):
        # enc_x, dec_x로 나누기
        enc_x, dec_x = x[:int(len(x)*0.5)], x[int(len(x)*0.5):] 
        
        _, enc_hid = self.encoder(enc_x)
        
        
        enc_hid = torch.tensor(enc_hid)
        
        
        dec_input = self.fc(dec_x)
        # 36, 10, 16
        
        for _ in range(sequence_length):
            
            _, dec_hid = self.decoder(dec_input)
            
            query = self.to_query(dec_hid) # 1, batch, hidden_size   
            key = self.to_key(enc_hid)  # 1, batch, hidden_size
            value = self.to_value(enc_hid) # 1, batch, hidden_size
            
            query = query.permute(1, 2, 0).contiguous() # batch, hidden_size, 1
            key = key.permute(1, 0, 2).contiguous() # batch, 1, hidden_size
            value = value.permute(1, 2, 0).contiguous() # batch, hidden_size, 1
            
            
            attention_score = query @ key # batch, hidden_size, 1 @ batch, 1, hidden_size
            attention_score = attention_score.softmax(dim = -1) # batch, hidden_size, hidden_size

            attention_value = attention_score @ value # batch, hidden_size, 1
            # hidden_size, hidden_size @ hidden_size, 1 >> hidden_size,1
            attention_value = attention_value.permute(2, 0, 1).contiguous() # 1, batch, hidden_size
            
            new_hidden = torch.tanh(self.fc0(torch.cat((attention_value, dec_hid),dim=2))) # 1, batch, hidden_size
            cell_state = torch.zeros_like(new_hidden)
            
            out, _  = self.LSTM(dec_input, (new_hidden, cell_state)) # batch, sequence_length, hidden_size
            out = out.reshape(out.shape[0], -1) # batch, sequence_length * hidden_size
            out = self.fc1(out) # batch, 1
            
            # dec_input = out.unsqueeze(1) # batch, 1, 1
            # dec_input = dec_input.repeat(1, hidden_size, 1) # batch, hidden_size, 1
            # dec_input = dec_input.
            # dec_input = dec_input.permute(2, 0, 1).contiguous() # 1, batch, hidden_size
            
            
        return out

In [36]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #gpu 활성화 확인
model = Attention(input_size, hidden_size, output_size, sequence_length, device).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

loss_graph = []
n = len(train_DL)

for epoch in range(100):
    running_loss = 0
    for data in train_DL:
        seq, target = data[0].to(device), data[1].to(device)
        target = target[int(len(target)*0.5):] 
        
        
        out = model(seq)
        loss = criterion(out, target)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    loss_graph.append(running_loss/n)
    if epoch % 20 == 0:
        print("[epoch: %d] loss : %.4f" %(epoch,running_loss/n))
        



  enc_hid = torch.tensor(enc_hid)


[epoch: 0] loss : 66.2569
[epoch: 20] loss : 92.6599
[epoch: 40] loss : 92.6599
