In [1]:
import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import math
from torch import nn, Tensor, TupleType
from torch.utils.data import TensorDataset, DataLoader, Dataset, random_split
from torch.autograd import Variable
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split  

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [2]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        # div_term = torch.exp(
        #     torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
        # )
        div_term = 1 / (10000 ** ((2 * np.arange(d_model)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term[0::2])
        pe[:, 1::2] = torch.cos(position * div_term[1::2])

        pe = pe.unsqueeze(0).transpose(0, 1) # [5000, 1, d_model],so need seq-len <= 5000
        #pe.requires_grad = False
        self.register_buffer('pe', pe)

    def forward(self, x):
        # print(self.pe[:x.size(0), :].repeat(1,x.shape[1],1).shape ,'---',x.shape)
        # dimension 1 maybe inequal batchsize
        return x + self.pe[:x.size(0), :].repeat(1,x.shape[1],1)

In [3]:
class TimeSeriesTransformer(nn.Module):
    
    def __init__(self, 
        input_size: int,
        dec_seq_len: int,
        batch_first: bool,
        out_seq_len: int=58,
        dim_val: int=512,  
        n_encoder_layers: int=4,
        n_decoder_layers: int=4,
        n_heads: int=8,
        dropout_encoder: float=0.2, 
        dropout_decoder: float=0.2,
        dropout_pos_enc: float=0.1,
        dim_feedforward_encoder: int=2048,
        dim_feedforward_decoder: int=2048,
        num_predicted_features: int=1
        ):

        super(TimeSeriesTransformer, self).__init__()

        self.dec_seq_len = dec_seq_len

        self.encoder_input_layer = nn.Linear(
        in_features=input_size, 
        out_features=dim_val 
        )

        self.decoder_input_layer = nn.Linear(
        in_features=num_predicted_features,
        out_features=dim_val
        )  
        
        self.linear_mapping = nn.Linear(
        in_features=dim_val, 
        out_features=num_predicted_features
        )

        # Create positional encoder
        self.positional_encoding_layer = PositionalEncoding(
            d_model=dim_val,
            #dropout=dropout_pos_enc
            )

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_val, 
            nhead=n_heads,
            dim_feedforward=dim_feedforward_encoder,
            dropout=dropout_encoder,
            batch_first=batch_first
            )
        
        self.encoder = nn.TransformerEncoder(
            encoder_layer=encoder_layer,
            num_layers=n_encoder_layers, 
            norm=None
            )
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=dim_val,
            nhead=n_heads,
            dim_feedforward=dim_feedforward_decoder,
            dropout=dropout_decoder,
            batch_first=batch_first
            )
        self.decoder = nn.TransformerDecoder(
            decoder_layer=decoder_layer,
            num_layers=n_decoder_layers, 
            norm=None
            )
        
    def forward(self, src: Tensor, tgt: Tensor, src_mask: Tensor=None, 
                tgt_mask: Tensor=None) -> Tensor:
        
        src = self.encoder_input_layer(src)
        src = self.positional_encoding_layer(src)   
        src = self.encoder( # src shape: [batch_size, enc_seq_len, dim_val]
            src=src
            )
        
        decoder_output = self.decoder_input_layer(tgt)
        decoder_output = self.decoder(
            tgt=decoder_output,
            memory=src,
            tgt_mask=tgt_mask,
            memory_mask=src_mask
            )
        decoder_output = self.linear_mapping(decoder_output)

        return decoder_output

In [4]:
# from typing import Tuple
# def get_src_tgt(
#         self,
#         sequence: torch.Tensor, 
#         enc_seq_len: int, 
#         target_seq_len: int
#         ) -> Tuple[torch.tensor, torch.tensor, torch.tensor]:

#         """
#         Generate the src (encoder input), tgt (decoder input) and tgt_y (the target)
#         sequences from a sequence. 
#         Args:
#             sequence: tensor, a 1D tensor of length n where 
#                     n = encoder input length + target sequence length  
#             enc_seq_len: int, the desired length of the input to the transformer encoder
#             target_seq_len: int, the desired length of the target sequence (the 
#                             one against which the model output is compared)
#         Return: 
#             src: tensor, 1D, used as input to the transformer model
#             tgt: tensor, 1D, used as input to the transformer model
#             tgt_y: tensor, 1D, the target sequence against which the model output
#                 is compared when computing loss. 
        
#         """
#         assert len(sequence) == enc_seq_len + target_seq_len, "Sequence length does not equal (input length + target length)"

#         src = sequence[:enc_seq_len] 
        
#         tgt = sequence[enc_seq_len-1:len(sequence)-1]
#         tgt = tgt[:, 0]

#         if len(tgt.shape) == 1:
#             tgt = tgt.unsqueeze(-1)
        
#         assert len(tgt) == target_seq_len, "Length of tgt does not match target sequence length"

#         tgt_y = sequence[-target_seq_len:]
#         tgt_y = tgt_y[:, 0]
#         assert len(tgt_y) == target_seq_len, "Length of tgt_y does not match target sequence length"


#         return src, tgt, tgt_y.squeeze(-1)

In [5]:
def generate_square_subsequent_mask(dim1: int, dim2: int) -> Tensor:
    """
    Generates an upper-triangular matrix of -inf, with zeros on diag.
    Source:
    https://pytorch.org/tutorials/beginner/transformer_tutorial.html
    Args:
        dim1: int, for both src and tgt masking, this must be target sequence
              length
        dim2: int, for src masking this must be encoder sequence length (i.e. 
              the length of the input sequence to the model), 
              and for tgt masking, this must be target sequence length 
    Return:
        A Tensor of shape [dim1, dim2]
    """
    return torch.triu(torch.ones(dim1, dim2) * float('-inf'), diagonal=1)

In [6]:
class StrideDataset(Dataset):
    def __init__(self, file_path, enc_seq_len, target_seq_len, x_size, stride=5):
        df = pd.read_csv(file_path)
        self.x = df.iloc[x_size:, 6:10].values
        self.y = df.iloc[x_size:, 13:14].values
        
        self.length = len(df) - x_size
        
        print(self.length)

        num_samples = (self.length - enc_seq_len - target_seq_len) // stride + 1 #stride씩 움직일 때 생기는 총 sample 개수
        
        src = np.zeros([enc_seq_len, num_samples])
        tgt = np.zeros([target_seq_len, num_samples])

        self.x = self.x.squeeze()

        for i in np.arange(num_samples):
            start_x = stride*i
            end_x = start_x + enc_seq_len
            src[:,i] = self.data[start_x:end_x]

            start_y = stride*i + enc_seq_len
            end_y = start_y + target_seq_len
            tgt[:,i] = self.data[start_y:end_y]

        src = src.reshape(src.shape[0], src.shape[1], 1).transpose((1,0,2))
        tgt = tgt.reshape(tgt.shape[0], tgt.shape[1], 1).transpose((1,0,2))
        self.src = src
        self.tgt = tgt
        
        self.len = len(src)

    def __getitem__(self, i):
        return self.src[i], self.tgt[i,:-1], self.tgt[i,1:]
    
    def __len__(self):
        return self.len

In [7]:
class CustomDataset(Dataset):
    def __init__(self, file_path, x_size):
        df = pd.read_csv(file_path)

        self.x = df.iloc[x_size:, 6:10].values
        #self.x = np.reshape(x, (x.shape[0], 1, x.shape[1]))

        self.y = df.iloc[x_size:, 13:14].values
        
        self.length = len(df) - x_size

    #getitem이거 왜씀?
    def __getitem__(self, index):
        # x = torch.FloatTensor([self.x[index]])
        # y = torch.FloatTensor([self.y[index]])
        # return x, y
        feature = torch.FloatTensor([self.x[index]])
        label = torch.FloatTensor(self.y[index])

        return feature, label

    def __len__(self):
        return self.length

In [8]:
train_dataset = StrideDataset("DST_80.csv", 300, 75, 1919, stride=50)
dataset_d = CustomDataset("DST_80.csv",1919)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=False, drop_last=False)

10639


In [9]:
# Input length
enc_seq_len = 300
# Output length
output_sequence_length = 75

tgt_mask = generate_square_subsequent_mask(
    dim1=output_sequence_length-1,
    dim2=output_sequence_length-1
   ).to(device)

src_mask = generate_square_subsequent_mask(
    dim1=output_sequence_length-1,
    dim2=enc_seq_len
    ).to(device)

In [10]:
print(tgt_mask.shape)
print(src_mask.shape)

torch.Size([74, 74])
torch.Size([74, 300])


In [11]:
dim_val = 512 # This can be any value divisible by n_heads. 512 is used in the original transformer paper.
n_heads = 8 # The number of attention heads (aka parallel attention layers). dim_val must be divisible by this number
n_decoder_layers = 4 # Number of times the decoder layer is stacked in the decoder
n_encoder_layers = 4 # Number of times the encoder layer is stacked in the encoder
input_size = 1 # The number of input variables. 1 if univariate forecasting.
dec_seq_len = 75 # length of input given to decoder. Can have any integer value.
enc_seq_len = 300 # length of input given to encoder. Can have any integer value.
output_sequence_length = 75 # Length of the target sequence, i.e. how many time steps should your forecast cover
#max_seq_len = enc_seq_len # What's the longest sequence the model will encounter? Used to make the positional encoder

model = TimeSeriesTransformer(
    dim_val=dim_val,
    input_size=input_size, 
    dec_seq_len=dec_seq_len,
    batch_first = True,
    out_seq_len=output_sequence_length, 
    n_decoder_layers=n_decoder_layers,
    n_encoder_layers=n_encoder_layers,
    n_heads=n_heads).to(device)

loss_func = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [12]:
epoch = 100

model.to(device)
model.train()

progress = tqdm(range(epoch))

for i in progress:
    batchloss = 0.0

    for (src, tgt, tgt_y) in train_dataloader:
        optimizer.zero_grad()

        result = model(src.float().to(device), tgt.float().to(device), src_mask, tgt_mask)
        loss = loss_func(result, tgt_y.float().to(device))

        loss.backward()
        optimizer.step()
        batchloss += loss
        
    progress.set_description("{:0.5f}".format(batchloss.cpu().item() / len(train_dataloader)))#??

553.62360: 100%|██████████| 100/100 [01:12<00:00,  1.37it/s]


In [13]:
# model.eval()
# epoch_loss = 0
# n = 0

# result_arr = []

# with torch.no_grad():
#     for (src, tgt, tgt_y) in train_dataloader:
#         result = model(src.float().to(device), tgt.float().to(device), src_mask, tgt_mask)
#         loss = loss_func(result, tgt_y.float().to(device))

#         epoch_loss += loss.item()
#         n = n+1
#         result_arr.append(result.float().cpu())

# print(n)
# result_arr = np.array(result_arr)
# result_arr[0].dtype
# # result_arr = torch.Tensor(result_arr)



In [14]:
# print(result_arr.dtype)
# print(result_arr[2])

In [15]:
# plt.figure(figsize=(10,6))
# plt.plot(dataset_d.y, label='Actual Data')
# plt.plot(result_arr, label='Predicted Data')
# plt.title('SoC prediction')
# #plt.xlim(0,2000)
# plt.legend()
# plt.show()

In [16]:
# input = torch.tensor(train_dataset.src).to(device)
# output  = torch.tensor(train_dataset.tgt).to(device)

# model.to(device)
# train_predict = model(input, output, src_mask, tgt_mask)
# predicted = train_predict.data.numpy()

# Y = torch.Tensor(dataset_d.y)


# # predicted = predicted.squeeze()
# # predicted.shape