<a href="https://colab.research.google.com/github/jhChoi1997/EE488_AI_Convergence_Capstone_Design_Anomaly_Detection_2022spring/blob/main/EE488_DCASE2020_WaveNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!gdown https://drive.google.com/uc?id=1p0aANQlQRKqM9FGhkV3j2h55PJUOgEXg
!unzip valve.zip -d ./valve/

!gdown https://drive.google.com/uc?id=15pTQ2nleRArII1BPJujw1Y7Kj13xs25k
!unzip valve_test.zip -d ./valve_test/

Downloading...
From: https://drive.google.com/uc?id=1p0aANQlQRKqM9FGhkV3j2h55PJUOgEXg
To: /content/valve.zip
100% 812M/812M [00:04<00:00, 174MB/s]
Archive:  valve.zip
  inflating: ./valve/normal_id_00_00000000.wav  
  inflating: ./valve/normal_id_00_00000001.wav  
  inflating: ./valve/normal_id_00_00000002.wav  
  inflating: ./valve/normal_id_00_00000003.wav  
  inflating: ./valve/normal_id_00_00000004.wav  
  inflating: ./valve/normal_id_00_00000005.wav  
  inflating: ./valve/normal_id_00_00000006.wav  
  inflating: ./valve/normal_id_00_00000007.wav  
  inflating: ./valve/normal_id_00_00000008.wav  
  inflating: ./valve/normal_id_00_00000009.wav  
  inflating: ./valve/normal_id_00_00000010.wav  
  inflating: ./valve/normal_id_00_00000011.wav  
  inflating: ./valve/normal_id_00_00000012.wav  
  inflating: ./valve/normal_id_00_00000013.wav  
  inflating: ./valve/normal_id_00_00000014.wav  
  inflating: ./valve/normal_id_00_00000015.wav  
  inflating: ./valve/normal_id_00_00000016.wav  


In [2]:
import os
import sys
import librosa
import librosa.core
import librosa.feature
import yaml
import glob
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from sklearn import metrics

In [3]:
dataset_dir = './valve'
test_dir = './valve_test'
model_dir = './model'

n_fft = 2048
hop_length = 512
n_mels = 128
power = 2
n_mul = 6
kernel_size = 3

EPOCHS = 1
BATCH = 32

In [4]:
def file_load(wav_name):
  try:
    return librosa.load(wav_name, sr=None, mono=False)
  except:
    print('file_broken or not exists!! : {}'.format(wav_name))
    

def file_list_generator(target_dir):
  training_list_path = os.path.abspath('{dir}/*.wav'.format(dir=target_dir))
  files = sorted(glob.glob(training_list_path))
  if len(files) == 0:
    print('no_wav_file!!')
  return files


def file_to_log_mel(file_name, n_mels, n_fft, hop_length, power):
  y, sr = file_load(file_name)
  mel_spectrogram = librosa.feature.melspectrogram(y=y,
                                                   sr=sr,
                                                   n_fft=n_fft,
                                                   hop_length=hop_length,
                                                   n_mels=n_mels,
                                                   power=power)
  
  log_mel_spectrogram = 20.0 / power * np.log10(mel_spectrogram + sys.float_info.epsilon)

  return log_mel_spectrogram


def list_to_dataset(file_list, n_mels, n_fft, hop_length, power):
  for idx in tqdm(range(len(file_list))):
    log_mel = file_to_log_mel(file_list[idx],
                              n_mels=n_mels,
                              n_fft=n_fft,
                              hop_length=hop_length,
                              power=power)
    if idx == 0:
      dataset = np.zeros((len(file_list), len(log_mel[:,0]), len(log_mel[0,:])), float)
    dataset[idx, :, :] = log_mel
  
  return dataset

In [5]:
os.makedirs(model_dir, exist_ok=True)

dataset_dir = os.path.abspath(dataset_dir)
machine_type = os.path.split(dataset_dir)[1]
model_file_path = '{model}/model_{machine_type}'.format(model=model_dir, machine_type=machine_type)

files = file_list_generator(dataset_dir)
train_data = list_to_dataset(files,
                             n_mels=n_mels,
                             n_fft=n_fft,
                             hop_length=hop_length,
                             power=power)

100%|██████████| 3291/3291 [01:19<00:00, 41.27it/s]


In [6]:
train_dataset = torch.Tensor(train_data)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [7]:
print(train_dataset.shape)

torch.Size([3291, 128, 313])


In [8]:
class CausalConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation=1):
        super(CausalConv1d, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.dilation = dilation

        self.conv1 = self.causal_conv(self.in_channels, self.out_channels, self.kernel_size, self.dilation)
        self.padding = self.conv1.padding[0]

    def causal_conv(self, in_channels, out_channels, kernel_size, dilation):
        pad = (kernel_size - 1) * dilation
        return nn.Conv1d(in_channels, out_channels, kernel_size, padding=pad, dilation=dilation)

    def forward(self, x):
        x = self.conv1(x)
        x = x[:, :, :-self.padding]
        return x


class ResidualBlock(nn.Module):
    def __init__(self, n_channel, n_mul, kernel_size, dilation_rate):
        super(ResidualBlock, self).__init__()
        self.n_channel = n_channel
        self.n_mul = n_mul
        self.kernel_size = kernel_size
        self.dilation_rate = dilation_rate
        self.n_filter = self.n_channel * self.n_mul

        self.sigmoid_group_norm = nn.GroupNorm(1, self.n_filter)
        self.sigmoid_conv = CausalConv1d(self.n_filter, self.n_filter, self.kernel_size, self.dilation_rate)
        self.tanh_group_norm = nn.GroupNorm(1, self.n_filter)
        self.tanh_conv = CausalConv1d(self.n_filter, self.n_filter, self.kernel_size, self.dilation_rate)

        self.skip_group_norm = nn.GroupNorm(1, self.n_filter).to(device)
        self.skip_conv = nn.Conv1d(self.n_filter, self.n_channel, 1)
        self.residual_group_norm = nn.GroupNorm(1, self.n_filter)
        self.residual_conv = nn.Conv1d(self.n_filter, self.n_filter, 1)

    def forward(self, x):
        x1 = self.sigmoid_group_norm(x)
        x1 = self.sigmoid_conv(x1)
        x2 = self.tanh_group_norm(x)
        x2 = self.tanh_conv(x2)
        x1 = nn.Sigmoid()(x1)
        x2 = nn.Tanh()(x2)
        x = x1 * x2

        x1 = self.skip_group_norm(x)
        skip = self.skip_conv(x1)
        x2 = self.residual_group_norm(x)
        residual = self.residual_conv(x2)

        return skip, residual


class WaveNet(nn.Module):
    def __init__(self, n_channel, n_mul, kernel_size):
        super(WaveNet, self).__init__()

        self.n_channel = n_channel
        self.n_mul = n_mul
        self.kernel_size = kernel_size
        self.n_filter = self.n_channel * self.n_mul

        self.group_norm1 = nn.GroupNorm(1, self.n_channel)
        self.conv1 = nn.Conv1d(self.n_channel, self.n_filter, 1)

        self.block1 = ResidualBlock(self.n_channel, self.n_mul, self.kernel_size, 1)
        self.block2 = ResidualBlock(self.n_channel, self.n_mul, self.kernel_size, 2)
        self.block3 = ResidualBlock(self.n_channel, self.n_mul, self.kernel_size, 4)
        self.block4 = ResidualBlock(self.n_channel, self.n_mul, self.kernel_size, 8)
        self.block5 = ResidualBlock(self.n_channel, self.n_mul, self.kernel_size, 16)

        self.relu1 = nn.ReLU()

        self.group_norm2 = nn.GroupNorm(1, self.n_channel)
        self.conv2 = nn.Conv1d(self.n_channel, self.n_channel, 1)
        self.relu2 = nn.ReLU()
        self.group_norm3 = nn.GroupNorm(1, self.n_channel)
        self.conv3 = nn.Conv1d(self.n_channel, self.n_channel, 1)

    def forward(self, x):
        x = self.group_norm1(x)
        x = self.conv1(x)

        skip1, x = self.block1(x)
        skip2, x = self.block2(x)
        skip3, x = self.block3(x)
        skip4, x = self.block4(x)
        skip5, x = self.block5(x)
        
        skip = skip1 + skip2 + skip3 + skip4 + skip5     
      
        x = self.relu1(skip)
        x = self.group_norm2(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.group_norm3(x)
        x = self.conv3(x)
        output = x[:, :, self.get_receptive_field() - 1:-1]

        return output

    def get_receptive_field(self):
        receptive_field = 1
        for _ in range(5):
            receptive_field = receptive_field * 2 + self.kernel_size - 2
        return receptive_field


In [9]:
model = WaveNet(n_mels, n_mul, kernel_size).to(device)
print(model)

WaveNet(
  (group_norm1): GroupNorm(1, 128, eps=1e-05, affine=True)
  (conv1): Conv1d(128, 768, kernel_size=(1,), stride=(1,))
  (block1): ResidualBlock(
    (sigmoid_group_norm): GroupNorm(1, 768, eps=1e-05, affine=True)
    (sigmoid_conv): CausalConv1d(
      (conv1): Conv1d(768, 768, kernel_size=(3,), stride=(1,), padding=(2,))
    )
    (tanh_group_norm): GroupNorm(1, 768, eps=1e-05, affine=True)
    (tanh_conv): CausalConv1d(
      (conv1): Conv1d(768, 768, kernel_size=(3,), stride=(1,), padding=(2,))
    )
    (skip_group_norm): GroupNorm(1, 768, eps=1e-05, affine=True)
    (skip_conv): Conv1d(768, 128, kernel_size=(1,), stride=(1,))
    (residual_group_norm): GroupNorm(1, 768, eps=1e-05, affine=True)
    (residual_conv): Conv1d(768, 768, kernel_size=(1,), stride=(1,))
  )
  (block2): ResidualBlock(
    (sigmoid_group_norm): GroupNorm(1, 768, eps=1e-05, affine=True)
    (sigmoid_conv): CausalConv1d(
      (conv1): Conv1d(768, 768, kernel_size=(3,), stride=(1,), padding=(4,), dila

In [10]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [11]:
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch, X in enumerate(dataloader):
    X = X.to(device)

    pred = model(X)
    receptive_field = model.get_receptive_field()

    loss = loss_fn(pred, X[:, :, receptive_field:])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 30 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [12]:
for t in range(EPOCHS):
  print(f"Epoch {t + 1}\n-------------------------------")
  train(train_dataloader, model, loss_fn, optimizer)

Epoch 1
-------------------------------
loss: 676.721436  [    0/ 3291]
loss: 525.730957  [  960/ 3291]
loss: 327.972717  [ 1920/ 3291]
loss: 185.729507  [ 2880/ 3291]


In [13]:
def get_anomaly_score(true, pred):
  anomaly_score = nn.MSELoss()(true, pred)
  return anomaly_score


In [14]:
normal_files = sorted(glob.glob('{dir}/normal_*'.format(dir=test_dir)))
anomaly_files = sorted(glob.glob('{dir}/anomaly_*'.format(dir=test_dir)))

normal_labels = np.zeros(len(normal_files))
anomaly_labels = np.ones(len(anomaly_files))

test_files = np.concatenate((normal_files, anomaly_files), axis=0)
y_true = np.concatenate((normal_labels, anomaly_labels), axis=0)
y_pred = [0. for k in test_files]


In [15]:
print(normal_files)

['./valve_test/normal_id_00_00000000.wav', './valve_test/normal_id_00_00000001.wav', './valve_test/normal_id_00_00000002.wav', './valve_test/normal_id_00_00000003.wav', './valve_test/normal_id_00_00000004.wav', './valve_test/normal_id_00_00000005.wav', './valve_test/normal_id_00_00000006.wav', './valve_test/normal_id_00_00000007.wav', './valve_test/normal_id_00_00000008.wav', './valve_test/normal_id_00_00000009.wav', './valve_test/normal_id_00_00000010.wav', './valve_test/normal_id_00_00000011.wav', './valve_test/normal_id_00_00000012.wav', './valve_test/normal_id_00_00000013.wav', './valve_test/normal_id_00_00000014.wav', './valve_test/normal_id_00_00000015.wav', './valve_test/normal_id_00_00000016.wav', './valve_test/normal_id_00_00000017.wav', './valve_test/normal_id_00_00000018.wav', './valve_test/normal_id_00_00000019.wav', './valve_test/normal_id_02_00000000.wav', './valve_test/normal_id_02_00000001.wav', './valve_test/normal_id_02_00000002.wav', './valve_test/normal_id_02_000000

In [16]:
test_dataset = list_to_dataset(test_files, n_mels, n_fft, hop_length, power)
receptive_field = model.get_receptive_field()
print(receptive_field)

100%|██████████| 160/160 [00:04<00:00, 39.05it/s]

63





In [17]:
# model = model.to('cpu')

for file_idx in tqdm(range(len(test_files)), desc='test data '):
  data = torch.tensor(test_dataset[file_idx], dtype=torch.float32).unsqueeze(0).to(device)

  output = model(data)

  score = get_anomaly_score(data[:, :, receptive_field:], output)
  y_pred[file_idx] = score

auc = metrics.roc_auc_score(y_true, y_pred)

test data : 100%|██████████| 160/160 [00:04<00:00, 39.73it/s]

0.525



  y_score = check_array(y_score, ensure_2d=False)


In [18]:
print(auc)

0.525
