# 1. 필요한 패키지 

In [None]:
import pandas as pd
import numpy as np
import json
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import splitfolders

from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# 2. Sound

- librosa를 이용한 소리데이터 이상탐지 데이터전처리 단계

In [None]:
def export_Mel_Spectrogram(Sound_path, Image_path):
    """
    wav 파일을 image 파일로 변환 후 저장하는 함수입니다.
    """

    frame_length = 0.025
    frame_stride = 0.010

    for filename in os.listdir(Sound_path):
        y, sr = librosa.load(os.path.join(Sound_path, filename), sr = None)

        input_nfft = int(round(sr*frame_length))
        input_stride = int(round(sr*frame_stride))

        S = librosa.feature.melspectrogram(y = y, n_mels = 40, n_fft = input_nfft, hop_length = input_stride)

        plt.figure(figsize=(10, 4))
        librosa.display.specshow(librosa.power_to_db(S, ref=np.max), sr = sr, hop_length=input_stride)
        plt.axis('off')
        plt.savefig(Image_path+filename+'.png', bbox_inches = 'tight', pad_inches = 0)
        plt.close()

In [None]:
def Image_Data_split(image_path, split_path):
    """
    Image data 8:1:1 비율로 분리
    """
    splitfolders.ratio(image_path,
    split_path, ratio = (0.8, 0.1, 0.1))

In [None]:
def Sound_Data_Generator(train_path, validation_path, test_path, hyperparameter):
    """
    ImageDataGenerator() 함수로 모든 image에 labeling 하고, 이미지를 trianing 할 수 있게 수치화해줌
    """
    data_generator = ImageDataGenerator(rescale = 1/255)

    train_generator = data_generator.flow_from_directory(
        train_path,
        target_size = (hyperparameter["size_width"], hyperparameter["size_height"]),
        color_mode = 'rgb',
        class_mode = 'binary',
        batch_size = 1)
    
    n_img = train_generator.n
    steps = n_img//1

    imgs, labels = [], []
    
    for i in range(steps):
        a, b = train_generator.next()
        imgs.extend(a)
        labels.extend(b)

    X_train = np.asarray(imgs)
    y_train = np.asarray(labels)

    validation_generator = data_generator.flow_from_directory(
        validation_path,
        target_size = (hyperparameter["size_width"], hyperparameter["size_height"]),
        color_mode = 'rgb',
        class_mode = 'binary',
        batch_size = 1)

    n_img = validation_generator.n
    steps = n_img//1

    imgs, labels = [], []

    for i in range(steps):
        a, b = validation_generator.next()
        imgs.extend(a)
        labels.extend(b)

    X_val = np.asarray(imgs)
    y_val = np.asarray(labels)

    test_generator = data_generator.flow_from_directory(
        test_path,
        target_size = (hyperparameter["size_width"], hyperparameter["size_height"]),
        color_mode = 'rgb',
        class_mode = 'binary',
        batch_size = 1)
    
    n_img = test_generator.n
    steps = n_img//1

    imgs, labels = [], []

    for i in range(steps):
        a, b = test_generator.next()
        imgs.extend(a)
        labels.extend(b)

    X_test = np.asarray(imgs)
    y_test = np.asarray(labels)

    return X_train, X_val, X_test, y_train, y_val, y_test