In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

from sklearn import preprocessing

In [2]:
class CustomDataset(Dataset):
  def __init__(self, x):
    self.x = x
    
  def __getitem__(self, index):
    x = self.x[index]
    return torch.Tensor(x)
  
  def __len__(self):
    return len(self.x)

In [3]:
class DeepSVDD(nn.Module):
  def __init__(self):
    super().__init__()
    self.svdd = nn.Sequential(
      nn.Linear(7, 32, bias=False),
      nn.BatchNorm1d(32, affine=False),
      nn.ReLU(),
      nn.Linear(32, 16, bias=False),
      nn.BatchNorm1d(16, affine=False),
      nn.ReLU(),
      nn.Linear(16, 8, bias=False),
      nn.BatchNorm1d(8, affine=False),
      nn.ReLU(),
      nn.Linear(8, 3, bias=False)
    )
    
  def forward(self, x):
    x = self.svdd(x)
    return x
  
class pretrain_autoencoder(nn.Module):
  def __init__(self):
    super().__init__()
    self.encoder = nn.Sequential(
      nn.Linear(7, 32, bias=False),
      nn.BatchNorm1d(32, affine=False),
      nn.ReLU(),
      nn.Linear(32, 16, bias=False),
      nn.BatchNorm1d(16, affine=False),
      nn.ReLU(),
      nn.Linear(16, 8, bias=False),
      nn.BatchNorm1d(8, affine=False),
      nn.ReLU(),
      nn.Linear(8, 3, bias=False)
    )
    
    self.decoder = nn.Sequential(
      nn.Linear(3, 8, bias=False),
      nn.BatchNorm1d(8, affine=False),
      nn.ReLU(),
      nn.Linear(8, 16, bias=False),
      nn.BatchNorm1d(16, affine=False),
      nn.ReLU(),
      nn.Linear(16, 32, bias=False),
      nn.BatchNorm1d(32, affine=False),
      nn.ReLU(),
      nn.Linear(32, 7, bias=False)
    )
  def encode(self, x):
    return self.encoder(x)
  
  def decode(self, x):
    return self.decoder(x)
  
  def forward(self, x):
    x = self.encode(x)
    x = self.decode(x)
    return x

In [4]:
class TrainerDeepSVDD:
  def __init__(self):
    self.device = "cuda" if torch.cuda.is_available() else "cpu"
    self.lr = 0.001
    self.epochs = 50
    self.name = "type3"
    
  
  def setup(self, train_path, test_path):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    # train_df.drop(columns=["type"], inplace=True)
    scaler = preprocessing.StandardScaler()
    scaler.fit(train_df)
    scaled_train = scaler.transform(train_df)

    test_df.drop(columns=["type"], inplace=True)
    scaled_test = scaler.transform(test_df)

    train_dataset = CustomDataset(x=scaled_train)
    self.train_dataloader = DataLoader(
      dataset=train_dataset,
      batch_size=32,
      shuffle=True,
      num_workers=0
    )

    test_dataset = CustomDataset(x=scaled_test)
    self.test_dataloader = DataLoader(
      dataset=test_dataset,
      batch_size=32,
      shuffle=False,
      num_workers=0
    )

  def _weights_init_normal(self, m):
    if isinstance(m, nn.Linear):
      m.weight.data.normal_(mean=0.0, std=0.02)
      if m.bias is not None:
        m.bias.data.zero_()
           
  def set_c(self, model, dataloader, eps=0.1):
    model.eval()
    z_ = []
    with torch.no_grad():
      for x, _ in dataloader:
        x = x.to(self.device)
        z = model.encode(x)
        z_.append(z.detach())
        
    z_ = torch.cat(z_)
    c = torch.mean(z_, dim=0)
    
    # 0근처로 되는 것을 방지하는 것 같다.
    # 절댓값이 eps보다 작고 0 보다 작은 경우 -eps
    c[(abs(c) < eps) & (c < 0)] = -eps
    # 절댓값이 eps보다 작고 0보다 큰 경우 eps
    c[(abs(c) < eps) & (c > 0)] = eps
    return c
    
  def save_weights_for_DeepSVDD(self, model, dataloader):
    c = self.set_c(model, dataloader)
    net = DeepSVDD().to(self.device)
    param = model.state_dict()
    net.load_state_dict(param, strict=False)
    torch.save({"center": c.cpu().data.numpy().tolist(), "net_dict": net.state_dict()}, f".\\pretrain_model\\pretrained_param_{self.name}.pth")

  def pretrain(self):
    
    ae = pretrain_autoencoder().to(self.device)
    ae.apply(self._weights_init_normal)
    
    optimizer = optim.Adam(ae.parameters(), lr=self.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
      optimizer=optimizer,
      mode="min",
      factor=0.1,
      patience=10
    )
    
    ae.train()
    for epoch in range(self.epochs):
      total_loss = 0
      for x, _ in self.train_dataloader:
        x = x.to(self.device)
        
        optimizer.zero_grad()
        output = ae(x)
        
        recon_loss = torch.mean(torch.sum((output - x) ** 2, dim=tuple(range(1, output.dim()))))
        recon_loss.backward()
        optimizer.step()
        
        total_loss += recon_loss.item()
      
      total_loss = total_loss/len(self.train_dataloader)
      scheduler.step(total_loss)
      print(F"EPOCH: {epoch} | Loss: {total_loss}")
    
    self.save_weights_for_DeepSVDD(ae, self.train_dataloader)
  
  def train(self):
    net = DeepSVDD().to(self.device)
    
    state_dict = torch.load(f'.\\pretrain_model\\pretrained_param_{self.name}.pth')
    net.load_state_dict(state_dict["net_dict"])
    self.c = torch.Tensor(state_dict["center"]).to(self.device)
    optimizer = optim.Adam(net.parameters(), lr=self.lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
      optimizer=optimizer,
      mode="min",
      factor=0.1,
      patience=10
    )
    
    net.train()
    for epoch in range(self.epochs):
      total_loss = 0
      for x, _ in self.train_dataloader:
        x = x.to(self.device)
        
        optimizer.zero_grad()
        output = net(x)
        
        loss = torch.mean(torch.sum((output - self.c) ** 2, dim=1))
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
      
      total_loss = total_loss/len(self.train_dataloader)
      scheduler.step(total_loss)
      print(F"EPOCH: {epoch} | Loss: {total_loss}")

    torch.save(net, f".\\model\\DeepSVDD_{self.name}.pth")
  
  def predict(self):
    net = torch.load(f".\\model\\DeepSVDD_{self.name}.pth")
    net.eval()
    preds = []
    z_ = []
    with torch.no_grad():
      for x, _ in self.test_dataloader:
        x = x.to(self.device)
        z = net(x)
        z_.append(z.detach().cpu())
        pred = torch.sum((z - self.c)**2, dim=1)
        preds.append(pred.detach().cpu())
    
    return z_, preds

In [5]:
# 타입별로 나눠서 학습할 수 있도록 코드 작성 하였음
deep_SVDD = TrainerDeepSVDD()
deep_SVDD.setup(
  train_path="./sampling_data/train_3.csv",
  test_path="./sampling_data/test_3.csv"
)

In [6]:
deep_SVDD.pretrain()

EPOCH: 0 | Loss: 4.477527540922165
EPOCH: 1 | Loss: 2.5480848759412766
EPOCH: 2 | Loss: 1.7675710052251816
EPOCH: 3 | Loss: 1.0960195258259773
EPOCH: 4 | Loss: 0.8386529073119163
EPOCH: 5 | Loss: 0.7454974070191384
EPOCH: 6 | Loss: 0.6826854258775711
EPOCH: 7 | Loss: 0.7118538819253445
EPOCH: 8 | Loss: 0.4886570431292057
EPOCH: 9 | Loss: 0.5109262004494667
EPOCH: 10 | Loss: 0.35462721101939676
EPOCH: 11 | Loss: 0.5246142223477364
EPOCH: 12 | Loss: 0.48459740318357947
EPOCH: 13 | Loss: 0.3309649132192135
EPOCH: 14 | Loss: 0.35874978452920914
EPOCH: 15 | Loss: 0.3420897154137492
EPOCH: 16 | Loss: 0.3632251977920532
EPOCH: 17 | Loss: 0.3219664519652724
EPOCH: 18 | Loss: 0.3204495269805193
EPOCH: 19 | Loss: 0.3836743785068393
EPOCH: 20 | Loss: 0.31766762509942054
EPOCH: 21 | Loss: 0.5438349548727274
EPOCH: 22 | Loss: 0.5128896564245224
EPOCH: 23 | Loss: 0.2606305042281747
EPOCH: 24 | Loss: 0.4252956833690405
EPOCH: 25 | Loss: 0.3720328968018293
EPOCH: 26 | Loss: 0.3449218852445483
EPOCH: 2

In [7]:
deep_SVDD.train()

EPOCH: 0 | Loss: 0.5034711375832558
EPOCH: 1 | Loss: 0.3066940262913704
EPOCH: 2 | Loss: 0.2097863256931305
EPOCH: 3 | Loss: 0.1447579436004162
EPOCH: 4 | Loss: 0.10501226373016834
EPOCH: 5 | Loss: 0.06623205877840518
EPOCH: 6 | Loss: 0.044918985664844514
EPOCH: 7 | Loss: 0.034597880113869904
EPOCH: 8 | Loss: 0.023053955752402544
EPOCH: 9 | Loss: 0.016987223550677298
EPOCH: 10 | Loss: 0.012653296045027674
EPOCH: 11 | Loss: 0.00971604692749679
EPOCH: 12 | Loss: 0.0078115747310221195
EPOCH: 13 | Loss: 0.008705945522524416
EPOCH: 14 | Loss: 0.0058362579206004735
EPOCH: 15 | Loss: 0.005296617129351944
EPOCH: 16 | Loss: 0.00558548109838739
EPOCH: 17 | Loss: 0.005820365017279982
EPOCH: 18 | Loss: 0.004697464278433472
EPOCH: 19 | Loss: 0.003789325314573944
EPOCH: 20 | Loss: 0.003112463798606768
EPOCH: 21 | Loss: 0.002565798896830529
EPOCH: 22 | Loss: 0.003867847751826048
EPOCH: 23 | Loss: 0.003838942927541211
EPOCH: 24 | Loss: 0.0023147810832597317
EPOCH: 25 | Loss: 0.0020612790423911063
EPOC

In [8]:
# 시각화 코드
def plot_anomaly_data(df):
  trace1 = go.Scatter3d(
    x=df[df.abnormal == 0]["x"],
    y=df[df.abnormal == 0]["y"],
    z=df[df.abnormal == 0]["z"],
    mode="markers",
    name="normal"
  )
  trace2 = go.Scatter3d(
    x=df[df.abnormal == 1]["x"],
    y=df[df.abnormal == 1]["y"],
    z=df[df.abnormal == 1]["z"],
    mode="markers",
    name="abnormal"
  )
  
  layout = go.Layout(
    title="abnomal & normal"
  )
  fig = go.Figure(data=[trace1, trace2], layout=layout)
  fig.show()

In [9]:
z, preds = deep_SVDD.predict()

In [10]:
preds = torch.cat(preds).numpy()

In [11]:
np.histogram(preds, bins=10)

(array([910,   1,   1,   0,   1,   0,   2,   0,   1,   2], dtype=int64),
 array([4.0672847e-05, 1.8735847e-02, 3.7431020e-02, 5.6126192e-02,
        7.4821368e-02, 9.3516544e-02, 1.1221171e-01, 1.3090688e-01,
        1.4960206e-01, 1.6829723e-01, 1.8699241e-01], dtype=float32))

In [12]:
pred_df = pd.DataFrame(torch.cat(z).numpy(), columns=['x', 'y', 'z'])

In [13]:
normal_mask = preds < 1.8735847e-02
answer = []
for mask in normal_mask:
  if mask:
    answer.append(0)
  else:
    answer.append(1)
    
pred_df["abnormal"] = [0 if mask else 1 for mask in normal_mask]
pred_df["color"] = pred_df["abnormal"].apply(lambda x: "blue" if x==0 else "red")
pred_df.head()

Unnamed: 0,x,y,z,abnormal,color
0,0.123242,0.135025,0.094979,0,blue
1,0.119375,0.113816,0.093016,0,blue
2,0.132871,0.103665,0.0927,0,blue
3,0.119259,0.109983,0.093485,0,blue
4,0.1311,0.102111,0.09337,0,blue


In [14]:
plot_anomaly_data(pred_df)

In [15]:
submit = pd.read_csv(".\\dataset\\answer_sample.csv")
# 04567
# submit["label"][:1296] = pred_df["abnormal"][:1296]
# submit["label"][1296+1107+1098+918:] = pred_df["abnormal"][1296:]

# 1
# submit["label"][1296:1296+1107] = pred_df["abnormal"]

# 2
# submit["label"][1296+1107:1296+1107+1098] = pred_df["abnormal"]

# 3
submit["label"][1296+1107+1098:1296+1107+1098+918] = pred_df["abnormal"]

In [16]:
submit.to_csv("submit_deepsvdd_type3.csv", index=False)