In [1]:
import pandas as pd
from tqdm import tqdm
import re
from datetime import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
## 인코더
class Encoder(nn.Module):

    def __init__(self, input_size=4096, hidden_size=1024, num_layers=2):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True,
        #                     dropout=0.1, bidirectional=False)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, dropout=0.1, bidirectional=False)

    def forward(self, x):
        output, hidden = self.lstm(x)  # out: tensor of shape (batch_size, seq_length, hidden_size)

        return hidden
    
## 디코더
class Decoder(nn.Module):

    def __init__(self, input_size=4096, hidden_size=1024, output_size=4096, num_layers=2):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

#         self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True,
#                             dropout=0.1, bidirectional=False)
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, 
                            batch_first=True, dropout=0.1, bidirectional=False)

        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, hidden):
        output, hidden = self.lstm(x, hidden)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        prediction = self.fc(output)

        return prediction, hidden
    
## RNN Auto Encoder
class RNNAutoEncoder(nn.Module):

    def __init__(self,
                 input_dim: int,
                 latent_dim: int,
                 window_size: int=1,
                 **kwargs) -> None:
        """
        :param input_dim: 변수 Tag 갯수
        :param latent_dim: 최종 압축할 차원 크기
        :param window_size: 길이
        :param kwargs:
        """

        super(RNNAutoEncoder, self).__init__()

        self.latent_dim = latent_dim
        self.input_dim = input_dim
        self.window_size = window_size

        if "num_layers" in kwargs:
            num_layers = kwargs.pop("num_layers")
        else:
            num_layers = 1

        self.encoder = Encoder(
            input_size=input_dim,
            hidden_size=latent_dim,
            num_layers=num_layers,
        )
        self.reconstruct_decoder = Decoder(
            input_size=input_dim,
            output_size=input_dim,
            hidden_size=latent_dim,
            num_layers=num_layers,
        )

    def forward(self, src:torch.Tensor, **kwargs):
        batch_size, sequence_length, var_length = src.size()

        ## Encoder 넣기
        encoder_hidden = self.encoder(src)
        
        inv_idx = torch.arange(sequence_length - 1, -1, -1).long()
        reconstruct_output = []
        temp_input = torch.zeros((batch_size, 1, var_length), dtype=torch.float).to(src.device)
        hidden = encoder_hidden
        for t in range(sequence_length):
            temp_input, hidden = self.reconstruct_decoder(temp_input, hidden)
            reconstruct_output.append(temp_input)
        reconstruct_output = torch.cat(reconstruct_output, dim=1)[:, inv_idx, :]
        
        return [reconstruct_output, src]

    def loss_function(self,
                      *args,
                      **kwargs) -> dict:
        recons = args[0]
        input = args[1]
        
        ## MSE loss(Mean squared Error)
        loss =F.mse_loss(recons, input)
        return loss

In [3]:
from dataset import CurrentDataset

In [4]:
path = '/home/workspace/iot_ai_model/dataset/current/train/**/normal/*.csv'
dataset = CurrentDataset(path)

In [5]:
len(dataset)

68100

In [6]:
model = RNNAutoEncoder(3, 500, num_layers=2).cuda()

  "num_layers={}".format(dropout, num_layers))


In [7]:
trn_dataloader = DataLoader(dataset,
                            batch_size=128,
                            shuffle=True,
                            num_workers=2,
                            pin_memory=True,
                            drop_last=False)

In [8]:
epochs = 20
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = F.mse_loss

In [None]:
for epoch in range(epochs):
    progress = tqdm(trn_dataloader)
    for path, cur, target in progress:
        cur = cur.cuda()
        output, _ = model(cur)
        loss = criterion(cur, output)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        progress.set_postfix_str(f'loss {loss.item()}')

 50%|████▉     | 265/533 [05:18<05:21,  1.20s/it, loss 117.00360107421875]

In [25]:
output[0].shape

torch.Size([1, 2000, 3])

In [None]:
epochs = 20
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)
rnn.train()
hist_loss = []
pre_loss = 1.
early_stopping_count = 0
for epoch in range(epochs):
    cost = .0 
    for x in tqdm(train_loader):
        pred,_ = model(x)

        ## MSE loss(Mean squared Error) loss를 사용
        loss = criterion(x, pred)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        cost += loss.item()
    avg_loss = cost / len(train_loader)
    hist_loss.append(avg_loss)    
    print('epoch:', epoch, 'loss:', avg_loss)

    # early_stopping. 학습 중단 조건 코드
    if epoch > 0 and hist_loss[epoch - 1] > avg_loss:
        early_stopping_count = 0
        torch.save(rnn.state_dict(), f'/content/drive/MyDrive/colab/cloudflow/model_state_{epoch}.pt')
    else:
        early_stopping_count += 1
    if early_stopping_count >=3 :
        break
