In [1]:
import random
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pandas as pd
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from sklearn import preprocessing
from sklearn.neighbors import LocalOutlierFactor

In [2]:
# 랜덤변수 통제 함수
def seed_everything(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = True

In [3]:
# 파이토치 커스텀 데이터 세트 정의
class CustomDataset(Dataset):
  def __init__(self, x):
    self.x = x
    
  def __getitem__(self, index):
    x = self.x[index]
    return torch.Tensor(x)
  
  def __len__(self):
    return len(self.x)

In [4]:
# 정상 데이터 사전 훈련을 위한 AutoEncoder 모델 정의
class AE(nn.Module):
  def __init__(self):
    super().__init__()
    self.encoder = nn.Sequential(
      # Deep SVDD에서 bias를 True로 두면 trival solution이 나타나기 때문에 bias를 없애주어야 함
      # AE + LOF는 bias True로 두어도 괜찮음
      nn.Linear(7, 32, bias=False),
      nn.BatchNorm1d(32, affine=False),
      nn.ReLU(),
      nn.Linear(32, 16, bias=False),
      nn.BatchNorm1d(16, affine=False),
      nn.ReLU(),
      nn.Linear(16, 8, bias=False),
      nn.BatchNorm1d(8, affine=False),
      nn.ReLU(),
      nn.Linear(8, 3, bias=False) # 3차원으로 압축
    )
    
    self.decoder = nn.Sequential(
      nn.Linear(3, 8, bias=False),
      nn.BatchNorm1d(8, affine=False),
      nn.ReLU(),
      nn.Linear(8, 16, bias=False),
      nn.BatchNorm1d(16, affine=False),
      nn.ReLU(),
      nn.Linear(16, 32, bias=False),
      nn.BatchNorm1d(32, affine=False),
      nn.ReLU(),
      nn.Linear(32, 7, bias=False)
    )
  def encode(self, x):
    return self.encoder(x)
  
  def decode(self, x):
    return self.decoder(x)
  
  def forward(self, x):
    x = self.encode(x)
    x = self.decode(x)
    return x

In [5]:
# 모델 훈련을 위한 Trainer 클래스 정의
class Trainer:
  def __init__(self):
    self.epochs = 70
    self.lr = 0.001
    self.seed = 911
    self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
  def setup(self, train_data_path):
    seed_everything(self.seed) # 랜덤시드 설정
    train_df = pd.read_csv(train_data_path)
    # 훈련시 train_df에 type열이 있으면 아래 코드의 주석을 해제해주세요
    train_df.drop(columns=["type"], inplace=True)
    scaler = preprocessing.StandardScaler() # 숫자 데이터는 표준화 해주는 것이 성능 향상에 도움이 됨
    scaled_train = scaler.fit_transform(train_df)
    
    train_dataset = CustomDataset(x=scaled_train)
    self.train_dataloader = DataLoader(
      dataset=train_dataset,
      batch_size=32,
      shuffle=True,
      num_workers=0
    )
    
  def _weights_init_normal(self, m):
    # 파라미터 랜덤 초기화
    if isinstance(m, nn.Linear):
      m.weight.data.normal_(mean=0.0, std=0.02)
      if m.bias is not None:
        m.bias.data.zero_()
  
  def train(self):
    self.model = AE().to(self.device)
    self.model.apply(self._weights_init_normal)
    optimizer = optim.Adam(self.model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
      optimizer=optimizer,
      mode="min",
      factor=0.1,
      patience=10
    )
    
    self.model.train()
    for epoch in range(1, self.epochs+1):
      total_loss = 0
      for x in self.train_dataloader:
        x = x.to(self.device)
        
        optimizer.zero_grad()
        output = self.model(x)
        
        recon_loss = torch.mean(torch.sum((output - x) ** 2, dim=tuple(range(1, output.dim()))))
        recon_loss.backward()
        optimizer.step()
        
        total_loss += recon_loss.item()

      total_loss = total_loss/len(self.train_dataloader)
      scheduler.step(total_loss)
      print(F"EPOCH: {epoch} | Loss: {total_loss}")

    torch.save({"param": self.model.state_dict()}, f"./AE_model.pth")

In [6]:
trainer = Trainer()
trainer.setup(train_data_path="./dataset/train_data.csv")
trainer.train()

EPOCH: 1 | Loss: 3.010600622598227
EPOCH: 2 | Loss: 1.336807677788394
EPOCH: 3 | Loss: 1.0731953678580073
EPOCH: 4 | Loss: 0.8932532751327985
EPOCH: 5 | Loss: 0.8856466234504402
EPOCH: 6 | Loss: 0.7840184410671135
EPOCH: 7 | Loss: 0.6592780877049867
EPOCH: 8 | Loss: 0.49815415107197575
EPOCH: 9 | Loss: 0.5415858570244405
EPOCH: 10 | Loss: 0.42986634663947215
EPOCH: 11 | Loss: 0.4481877069194595
EPOCH: 12 | Loss: 0.38203221336974724
EPOCH: 13 | Loss: 0.4674478147130508
EPOCH: 14 | Loss: 0.39256715150429056
EPOCH: 15 | Loss: 0.3471336748402614
EPOCH: 16 | Loss: 0.30872259782506273
EPOCH: 17 | Loss: 0.28469432803330486
EPOCH: 18 | Loss: 0.3036712133748965
EPOCH: 19 | Loss: 0.3233362057959879
EPOCH: 20 | Loss: 0.25578938898715103
EPOCH: 21 | Loss: 0.33158961818976834
EPOCH: 22 | Loss: 0.27197882840288923
EPOCH: 23 | Loss: 0.3147136362922656
EPOCH: 24 | Loss: 0.2725082644587987
EPOCH: 25 | Loss: 0.24885935655661992
EPOCH: 26 | Loss: 0.33231717115872866
EPOCH: 27 | Loss: 0.2983536461440774
E