In [4]:
# 필요한 라이브러리 설정
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import models
from pytorch_model_summary import summary

import numpy as np
from sklearn.preprocessing import MinMaxScaler

import os
from os.path import join
from pickle import load
from tqdm import tqdm
import random
import datetime

In [8]:
######### 설정 영역 ########
# 실험 관련 세팅
exp_name = 'torch_20220511' # 실험 이름 혹은 오늘 날짜
modelVersion = 'Dense_1st_torch'
nameDataset = 'IWALQQ_AE_1st'
dataType = 'angle' # or moBWHT

#################################
# 여기는 grid로 돌림!
#################################
list_learningRate = {0:0.006, 1:0.008, 2:0.01} # opt1 
list_batch_size = {0:128} # opt2
list_lossFunction =  {0:"MAE"} # opt2

totalFold = 5 # total fold, I did 5-fold cross validation
epochs = 1000 # total epoch 
log_interval = 10 # frequency for saving log file
count = 0 # In SCC, get count for grid-training

In [26]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [27]:
# 내 모델을 구현하기 위한 세부 sub module
class VariationalEncoder(nn.Module):
    def __init__(self, seq_len, n_features, embedding_dim, device):
        super(VariationalEncoder, self).__init__()
        self.seq_len, self.n_features = seq_len, n_features
        self.embedding_dim, self.hidden_dim = (
            embedding_dim, 2 * embedding_dim
        )
        self.rnn1 = nn.LSTM(
          input_size=n_features,
          hidden_size=self.hidden_dim,
          num_layers=1,
          batch_first=True,
          bidirectional=True
        )
        self.rnn2 = nn.LSTM(
          input_size=self.hidden_dim * 2,
          hidden_size=embedding_dim,
          num_layers=1,
          batch_first=True,
          bidirectional=True
        )
        self.mu = torch.nn.Linear(self.embedding_dim * 2, self.embedding_dim) # bidirectianl이 켜져 있어서 그럼
        self.sigma = torch.nn.Linear(self.embedding_dim * 2, self.embedding_dim) # bidirectianl이 켜져 있어서 그럼
        self.N = torch.distributions.Normal(0, 1)
        # cuda()
        if device == 'cuda':
          self.N.loc = self.N.loc.cuda() # hack to get sampling on the GPU
          self.N.scale = self.N.scale.cuda()
        if device == 'cpu':
          self.N.loc = self.N.loc.cpu() # hack to get sampling on the GPU
          self.N.scale = self.N.scale.cpu()
        self.kl = 0
      
    def forward(self, x):
        x, (_, _) = self.rnn1(x)
        x, (hidden_n, _) = self.rnn2(x)
        mu =  self.mu(x[:,-1,:])
        sigma = torch.exp(self.sigma(x[:,-1,:]))
        z = mu + sigma*self.N.sample(mu.shape)
        self.kl = (sigma**2 + mu**2 - torch.log(sigma) - 1/2).sum()
        return  z

In [32]:
# show input shape
print(summary(VariationalEncoder(101, 42, 30, device), torch.zeros((32, 101, 42)), show_input=True))
# show output shape
print(summary(VariationalEncoder(101, 42, 30, device), torch.zeros((32, 101, 42)), show_input=False))

-----------------------------------------------------------------------
      Layer (type)         Input Shape         Param #     Tr. Param #
            LSTM-1       [32, 101, 42]          49,920          49,920
            LSTM-2      [32, 101, 120]          36,480          36,480
          Linear-3            [32, 60]           1,830           1,830
          Linear-4            [32, 60]           1,830           1,830
Total params: 90,060
Trainable params: 90,060
Non-trainable params: 0
-----------------------------------------------------------------------
------------------------------------------------------------------------------------------------
      Layer (type)                                 Output Shape         Param #     Tr. Param #
            LSTM-1     [32, 101, 120], [2, 32, 60], [2, 32, 60]          49,920          49,920
            LSTM-2      [32, 101, 60], [2, 32, 30], [2, 32, 30]          36,480          36,480
          Linear-3                            

In [33]:
# https://discuss.pytorch.org/t/any-pytorch-function-can-work-as-keras-timedistributed/1346/25
class TimeDistributed(nn.Module):
    def __init__(self, module):
        super(TimeDistributed, self).__init__()
        self.module = module

    def forward(self, x):
        t, n = x.size(0), x.size(1)
        x_reshape = x.contiguous().view(t * n, -1)  # (samples * timesteps, input_size)
        y = self.module(x_reshape)
        # We have to reshape Y
        y = y.contiguous().view(t, n, -1)  # (samples, timesteps, output_size)
        return y

In [34]:
class Decoder(nn.Module):
    def __init__(self, seq_len, input_dim, n_features):
        super(Decoder, self).__init__()
        self.seq_len, self.input_dim = seq_len, input_dim
        self.hidden_dim, self.n_features = 2 * input_dim, n_features
        self.rnn1 = nn.LSTM(
          input_size=input_dim,
          hidden_size=input_dim,
          num_layers=1,
          batch_first=True,
          bidirectional = True
        )
        self.rnn2 = nn.LSTM(
          input_size=input_dim * 2,
          hidden_size=self.hidden_dim,
          num_layers=1,
          batch_first=True,
          bidirectional = True
        )
        self.output_layer = torch.nn.Linear(self.hidden_dim * 2, self.n_features)
        self.timedist = TimeDistributed(self.output_layer)
        
    def forward(self, x):
        # print(f'decoder first shape of x: {x.shape}')
        x = x.reshape(-1,1,self.input_dim).repeat(1,self.seq_len,1)
        # print(f'decoder after repeatvector shape of x: {x.shape}')       
        x, (hidden_n, cell_n) = self.rnn1(x)
        x, (hidden_n, cell_n) = self.rnn2(x)
        # print(f'decoder last shape of x: {self.timedist(x).shape}')
        return self.timedist(x)

In [35]:
# show input shape
print(summary(Decoder(101, 30, 42), torch.zeros((30)), show_input=True))
# show output shape
print(summary(Decoder(101, 30, 42), torch.zeros((30)), show_input=False))

-------------------------------------------------------------------------
        Layer (type)         Input Shape         Param #     Tr. Param #
              LSTM-1        [1, 101, 30]          14,880          14,880
              LSTM-2        [1, 101, 60]          58,560          58,560
   TimeDistributed-3       [1, 101, 120]           5,082           5,082
Total params: 78,522
Trainable params: 78,522
Non-trainable params: 0
-------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------
        Layer (type)                              Output Shape         Param #     Tr. Param #
              LSTM-1      [1, 101, 60], [2, 1, 30], [2, 1, 30]          14,880          14,880
              LSTM-2     [1, 101, 120], [2, 1, 60], [2, 1, 60]          58,560          58,560
   TimeDistributed-3                              [1, 101, 42]           5,082           5,082
Total params: 

In [36]:
# main module
class RecurrentVariationalAutoencoder(nn.Module):
    def __init__(self, seq_len, n_features, embedding_dim=30, device='cuda'):
        super(RecurrentVariationalAutoencoder, self).__init__()
        self.encoder = VariationalEncoder(seq_len, n_features, embedding_dim, device).to(device)
        self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
    def forward(self, x):
        # print(f'first shape of x: {x.shape}')
        z = self.encoder(x)
        # print(f'last shape of x: {x.shape}')
        return self.decoder(z)

In [37]:

# show input shape
print(summary(RecurrentVariationalAutoencoder(101, 42, 30, device), torch.zeros((16, 101, 42)), show_input=True))

# show output shape
print(summary(RecurrentVariationalAutoencoder(101, 42, 30, device), torch.zeros((16, 101, 42)), show_input=False))

----------------------------------------------------------------------------
           Layer (type)         Input Shape         Param #     Tr. Param #
   VariationalEncoder-1       [16, 101, 42]          90,060          90,060
              Decoder-2            [16, 30]          78,522          78,522
Total params: 168,582
Trainable params: 168,582
Non-trainable params: 0
----------------------------------------------------------------------------
----------------------------------------------------------------------------
           Layer (type)        Output Shape         Param #     Tr. Param #
   VariationalEncoder-1            [16, 30]          90,060          90,060
              Decoder-2       [16, 101, 42]          78,522          78,522
Total params: 168,582
Trainable params: 168,582
Non-trainable params: 0
----------------------------------------------------------------------------


# 학습 루프 함수화 (구현)

In [39]:
# 학습루프 구현하기
# def train(dataloader, model, loss_fn, optimizer, device): 

def train_vae(model, dataloader, summarywriter, epoch, device, optimizer,):
    model.train()
    for _, (data, target) in enumerate(tqdm(dataloader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = ((data - output)**2).sum() + model.encoder.kl 
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.size(0) # 이것은 모든 배치의 크기가 일정하지 않을 수 있기 때문에 이렇게 수행함! train_loss는 total loss of batch가 됨
    train_loss /= len(dataloader.sampler)
    summarywriter.add_scalar('loss(MAE)', train_loss, epoch)
    summarywriter.add_hparams(
                    {"sess": "train", "Type": dataType, "lr": learningRate, "bsize": batch_size, "DS":nameDataset , 'lossFunc':lossFunction}, 
                    { 
                        "loss": train_loss,
                    }, 
                ) 

def test_vae(model, dataloader, summarywriter, epoch, device):
    model.eval()  # batch norm이나 dropout 등을 train mode 변환
    with torch.no_grad():  # autograd engine, 즉 backpropagatin이나 gradient 계산 등을 꺼서 memory usage를 줄이고 속도를 높임
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = ((data - output)**2).sum() + model.encoder.kl 
            test_loss += loss.item() * data.size(0)
        test_loss /= len(dataloader.sampler)
        summarywriter.add_scalar('loss(MAE)', test_loss, epoch)
        summarywriter.add_hparams(
                        {"sess": "train", "Type": dataType, "lr": learningRate, "bsize": batch_size, "DS":nameDataset , 'lossFunc':lossFunction}, 
                        { 
                            "loss": test_loss,
                        }, 
                    ) 
    pass

# 학습 루프 함수화 (vae 예제)

In [None]:
def train(autoencoder, data, epochs=20):
    opt = torch.optim.Adam(autoencoder.parameters())
    for epoch in range(epochs):
        for x, y in data:
            x = x.to(device) # GPU
            opt.zero_grad()
            x_hat = autoencoder(x)
            loss = ((x - x_hat)**2).sum() + autoencoder.encoder.kl
            loss.backward()
            opt.step()
    return autoencoder

# 학습 루프 함수화 (예제)
- 참고 https://koreapy.tistory.com/739

In [None]:
loss_fn = nn.CrossEntropyLoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
def train(dataloader, model, loss_fn, optimizer, device): 
    size = len(dataloader.dataset) 
    for batch, (X, y) in enumerate(dataloader): 
        X, y = X.to(device), y.to(device) 
        # Compute prediction error 
        pred = model(X) 
        loss = loss_fn(pred, y) 
        # Backpropagation 
        optimizer.zero_grad() 
        loss.backward() 
        optimizer.step() 
        if batch % 100 == 0: 
            loss, current = loss.item(), batch * len(X) 
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model, loss_fn, device): 
    size = len(dataloader.dataset) 
    num_batches = len(dataloader) 
    model.eval() 
    test_loss, correct = 0, 0 
    with torch.no_grad(): 
        for X, y in dataloader: X, y = X.to(device), y.to(device) 
        pred = model(X) 
        test_loss += loss_fn(pred, y).item() 
        correct += (pred.argmax(1) == y).type(torch.float).sum().item() 
        test_loss /= num_batches 
        correct /= size 
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 5 
for t in range(epochs): 
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn) 
print("Done!")

# 배운것


In [None]:
a = torch.randn(2, 3, 4)
print(a.size())
print(a.stride())
print(a.is_contiguous())
a = a.transpose(0, 1)
print(a.is_contiguous())
a = a.contiguous()
a = a.view(-1)
print(a.size())

In [2]:
a = ((data - output)**2).sum()

NameError: name 'data' is not defined