<a href="https://colab.research.google.com/github/jhChoi1997/EE488_AI_Convergence_Capstone_Design_Anomaly_Detection_2022spring/blob/main/EE488_DCASE2020_Baseline_fixed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
import os
drive.mount('/content/gdrive')
root_path = 'gdrive/MyDrive/EE488/'
os.chdir(root_path)

!unzip -qq valve.zip -d ./valve/
!unzip -qq valve_test.zip -d ./valve_test/

Mounted at /content/gdrive


In [2]:
import sys
import librosa
import librosa.core
import librosa.feature
import glob
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from sklearn import metrics

In [12]:
dataset_dir = './valve'
test_dir = './valve_test'
model_dir = './model'

n_fft = 2048
hop_length = 512
n_mels = 128
power = 2
frames = 5

EPOCHS = 10
BATCH = 1024

In [4]:
def file_load(wav_name):
  try:
    return librosa.load(wav_name, sr=None, mono=False)
  except:
    print('file_broken or not exists!! : {}'.format(wav_name))


def file_list_generator(target_dir):
  training_list_path = os.path.abspath('{dir}/*.wav'.format(dir=target_dir))
  files = sorted(glob.glob(training_list_path))
  if len(files) == 0:
    print('no_wav_file!!')
  return files


def file_to_vector_array(file_name, n_mels, frames, n_fft, hop_length, power):
  dims = n_mels * frames

  y, sr = file_load(file_name)
  mel_spectrogram = librosa.feature.melspectrogram(y=y,
                                                   sr=sr,
                                                   n_fft=n_fft,
                                                   hop_length=hop_length,
                                                   n_mels=n_mels,
                                                   power=power)
  
  log_mel_spectrogram = 20.0 / power * np.log10(mel_spectrogram + sys.float_info.epsilon)

  vector_array_size = len(log_mel_spectrogram[0, :]) - frames + 1

  if vector_array_size < 1:
    return np.empty((0, dims))

  vector_array = np.zeros((vector_array_size, dims))
  for t in range(frames):
    vector_array[:, n_mels * t: n_mels * (t + 1)] = log_mel_spectrogram[:, t: t + vector_array_size].T
  
  return vector_array


def list_to_vector_array(file_list, n_mels, frames, n_fft, hop_length, power):
  dims = n_mels * frames

  for idx in tqdm(range(len(file_list))):
    vector_array = file_to_vector_array(file_list[idx],
                                        n_mels=n_mels,
                                        frames=frames,
                                        n_fft=n_fft,
                                        hop_length=hop_length,
                                        power=power)
    if idx == 0:
      dataset = np.zeros((vector_array.shape[0] * len(file_list), dims), float)
    dataset[vector_array.shape[0] * idx: vector_array.shape[0] * (idx + 1), :] = vector_array

  return dataset

In [5]:
os.makedirs(model_dir, exist_ok=True)

dataset_dir = os.path.abspath(dataset_dir)
machine_type = os.path.split(dataset_dir)[1]
model_file_path = '{model}/model_{machine_type}'.format(model=model_dir, machine_type=machine_type)

files = file_list_generator(dataset_dir)
train_data = list_to_vector_array(files,
                                  n_mels=n_mels,
                                  frames=frames,
                                  n_fft=n_fft,
                                  hop_length=hop_length,
                                  power=power)

100%|██████████| 3291/3291 [01:32<00:00, 35.68it/s]


In [6]:
train_dataset = torch.Tensor(train_data)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [7]:
class AutoEncoder(nn.Module):
  def __init__(self, input_dim):
    super(AutoEncoder, self).__init__()
    self.input_dim = input_dim
    self.encoder = nn.Sequential(
        nn.Linear(self.input_dim, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 8),
        nn.BatchNorm1d(8),
        nn.ReLU()
    )
    self.decoder = nn.Sequential(
        nn.Linear(8, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, self.input_dim)
    )

  def forward(self, x):
    latent = self.encoder(x)
    output = self.decoder(latent)
    return output

In [8]:
model = AutoEncoder(n_mels * frames).to(device)
print(model)

AutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=640, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=128, bias=True)
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Linear(in_features=128, out_features=128, bias=True)
    (10): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Linear(in_features=128, out_features=8, bias=True)
    (13): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=8, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e

In [9]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [10]:
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch, X in enumerate(dataloader):
    X = X.to(device)

    pred = model(X)
    loss = loss_fn(pred, X)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 300 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [13]:
for t in range(EPOCHS):
  print(f"Epoch {t + 1}\n-------------------------------")
  train(train_dataloader, model, loss_fn, optimizer)


Epoch 1
-------------------------------
loss: 18.581091  [    0/1016919]
loss: 15.865771  [307200/1016919]
loss: 22.997976  [614400/1016919]
loss: 12.144976  [921600/1016919]
Epoch 2
-------------------------------
loss: 15.608582  [    0/1016919]
loss: 13.671338  [307200/1016919]
loss: 21.324085  [614400/1016919]
loss: 11.277270  [921600/1016919]
Epoch 3
-------------------------------
loss: 14.868581  [    0/1016919]
loss: 13.244428  [307200/1016919]
loss: 20.041546  [614400/1016919]
loss: 10.913928  [921600/1016919]
Epoch 4
-------------------------------
loss: 13.489589  [    0/1016919]
loss: 12.253464  [307200/1016919]
loss: 19.447729  [614400/1016919]
loss: 10.671268  [921600/1016919]
Epoch 5
-------------------------------
loss: 12.081840  [    0/1016919]
loss: 11.513174  [307200/1016919]
loss: 18.954729  [614400/1016919]
loss: 10.464519  [921600/1016919]
Epoch 6
-------------------------------
loss: 11.447602  [    0/1016919]
loss: 11.154375  [307200/1016919]
loss: 18.283361  [

In [14]:
normal_files = sorted(glob.glob('{dir}/normal_*'.format(dir=test_dir)))
anomaly_files = sorted(glob.glob('{dir}/anomaly_*'.format(dir=test_dir)))

normal_labels = np.zeros(len(normal_files))
anomaly_labels = np.ones(len(anomaly_files))

test_files = np.concatenate((normal_files, anomaly_files), axis=0)
y_true = np.concatenate((normal_labels, anomaly_labels), axis=0)
y_pred = [0. for k in test_files]

In [15]:
def generate_test_dataset(file_list, n_mels, frames, n_fft, hop_length, power):
  dims = n_mels * frames
  for idx in tqdm(range(len(file_list))):
    vector_array = file_to_vector_array(file_list[idx],
                                        n_mels=n_mels,
                                        frames=frames,
                                        n_fft=n_fft,
                                        hop_length=hop_length,
                                        power=power)
    if idx == 0:
      dataset = np.zeros((len(file_list), vector_array.shape[0], vector_array.shape[1]), float)
    dataset[idx] = vector_array

  return dataset

In [None]:
test_dataset = generate_test_dataset(test_files, n_mels, frames, n_fft, hop_length, power)

  0%|          | 0/879 [00:00<?, ?it/s]

In [None]:
print(test_dataset.shape)

(160, 309, 640)


In [None]:
def get_anomaly_score(true, pred):
  anomaly_score = nn.MSELoss()(true, pred)
  return anomaly_score

In [None]:
for file_idx in tqdm(range(len(test_files)), desc='test data '):
  data = torch.tensor(test_dataset[file_idx], dtype=torch.float32).to(device)
  output = model(data)

  score = get_anomaly_score(data, output)
  y_pred[file_idx] = score

auc = metrics.roc_auc_score(y_true, y_pred)

test data : 100%|██████████| 160/160 [00:00<00:00, 322.63it/s]
  y_score = check_array(y_score, ensure_2d=False)


In [None]:
print(auc)

0.4753125
