# 모델 추론

In [6]:
import os
import json
import math
import numpy as np
import librosa
from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from skimage.transform import resize


class Dataloader(Sequence):
    def __init__(self, Audios, labels, batch_size):
        self.Audios = Audios
        self.labels = labels
        self.batch_size = batch_size
        self.num_classes = len(set(self.labels))
        self.indices = np.arange(len(self.labels))

    def __len__(self):
        return math.ceil(len(self.labels)/self.batch_size)
    
    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size : (idx+1)*self.batch_size]
        batch_x = [self.Audios[i] for i in indices]
        batch_audios = self.get_Audios(batch_x)
        batch_y = [self.labels[i] for i in indices]
        # 라벨을 원-핫 인코딩
        batch_y = to_categorical(batch_y, num_classes=self.num_classes)
        return np.array(batch_audios), np.array(batch_y)
    
    def get_Audios(self, path_list):
        # 오디오 데이터 로딩 및 전처리
        spectrograms = []
        for file_path in path_list:
            # 오디오 파일 로딩
            y, sr = librosa.load(file_path, sr=None)
            # 오디오 파일을 스펙트로그램으로 변환
            S = librosa.feature.melspectrogram(y=y, sr=sr)
            log_S = librosa.power_to_db(S, ref=np.max)
            
            # 스펙트로그램 이미지의 크기를 VGG16 입력 크기에 맞춤 (224, 224)
            log_S_resized = resize(log_S, (224, 224))
            
            # 채널 차원 추가 (VGG16은 RGB 이미지를 입력으로 받기 때문에 3차원이 필요)
            log_S_resized = np.stack([log_S_resized] * 3, axis=-1)
            
            spectrograms.append(log_S_resized) 

        return np.array(spectrograms)



# JSON 파일 로딩 및 데이터 및 라벨 생성
with open('../test_dataset.json', 'r') as f:
    folder_label_mapping = json.load(f)

file_paths = []
labels = []

# 각 폴더 및 라벨에 대해
for file_path, label in folder_label_mapping.items():
    # 파일 확장자 확인하여 wav 파일만 처리
    if file_path.lower().endswith('.wav'):
        # 파일 경로 및 라벨 저장
        file_paths.append(file_path)
        labels.append(label)

# 라벨을 정수로 변환 (만약 문자열 라벨을 사용하고 있다면)
unique_labels = sorted(set(labels))
label_to_int = {label: i for i, label in enumerate(unique_labels)}
labels = [label_to_int[label] for label in labels]

# 라벨의 종류 수 계산
num_classes = len(set(labels))

# 라벨 배열이 비어 있지 않은지 확인
if len(labels) == 0:
    raise ValueError("Labels array is empty. Check your data loading logic.") 



# Validation

In [1]:
import os
import json
import math
import numpy as np
import librosa
from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from skimage.transform import resize


class Dataloader(Sequence):
    def __init__(self, Audios, labels, batch_size):
        self.Audios = Audios
        self.labels = labels
        self.batch_size = batch_size
        self.num_classes = len(set(self.labels))
        self.indices = np.arange(len(self.labels))

    def __len__(self):
        return math.ceil(len(self.labels)/self.batch_size)
    
    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size : (idx+1)*self.batch_size]
        batch_x = [self.Audios[i] for i in indices]
        batch_audios = self.get_Audios(batch_x)
        batch_y = [self.labels[i] for i in indices]
        # 라벨을 원-핫 인코딩
        batch_y = to_categorical(batch_y, num_classes=self.num_classes)
        return np.array(batch_audios), np.array(batch_y)
    
    def get_Audios(self, path_list):
        # 오디오 데이터 로딩 및 전처리
        spectrograms = []
        for file_path in path_list:
            # 오디오 파일 로딩
            y, sr = librosa.load(file_path, sr=None)
            # 오디오 파일을 스펙트로그램으로 변환
            S = librosa.feature.melspectrogram(y=y, sr=sr)
            log_S = librosa.power_to_db(S, ref=np.max)
            
            # 스펙트로그램 이미지의 크기를 VGG16 입력 크기에 맞춤 (224, 224)
            log_S_resized = resize(log_S, (224, 224))
            
            # 채널 차원 추가 (VGG16은 RGB 이미지를 입력으로 받기 때문에 3차원이 필요)
            log_S_resized = np.stack([log_S_resized] * 3, axis=-1)
            
            spectrograms.append(log_S_resized) 

        return np.array(spectrograms)



# JSON 파일 로딩 및 데이터 및 라벨 생성
with open('../test_dataset.json', 'r') as f:
    folder_label_mapping = json.load(f)

file_paths = []
labels = []

# 각 폴더 및 라벨에 대해
for file_path, label in folder_label_mapping.items():
    # 파일 확장자 확인하여 wav 파일만 처리
    if file_path.lower().endswith('.wav'):
        # 파일 경로 및 라벨 저장
        file_paths.append(file_path)
        labels.append(label)

Valid_dataloader = Dataloader(file_paths, labels, 16)

2023-12-14 06:44:36.611304: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-14 06:44:36.611347: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-14 06:44:36.611364: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-14 06:44:36.617386: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import tensorflow as tf
from tensorflow.keras.models import load_model

# 모델 로드
#model_path = '/root/Compare_models/vgg16_100epochs_model.h5'
model_path = './model/vgg16_50epochs_model_sum.h5'
model = load_model(model_path)

# 모델 예측
loss, acc = model.evaluate(Valid_dataloader, )
print('loss={}, acc={}'.format(loss, acc))

2023-12-14 06:44:38.315261: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-14 06:44:38.320512: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-14 06:44:38.320817: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-14 06:44:38.323012: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-14 06:44:38.323192: I tensorflow/compile

  2/601 [..............................] - ETA: 35s - loss: 0.6933 - accuracy: 0.4062  

2023-12-14 06:44:48.454961: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


loss=0.6931594610214233, acc=0.4932861328125


In [3]:
pred = model.predict(Valid_dataloader)

 33/601 [>.............................] - ETA: 8:21



In [4]:
metric = tf.keras.metrics.F1Score(threshold=0.5)
y_true = to_categorical(Valid_dataloader.labels)
y_pred = pred
print(y_true)


[[1. 0.]
 [1. 0.]
 [0. 1.]
 ...
 [0. 1.]
 [1. 0.]
 [1. 0.]]


In [5]:
metric.update_state(y_true, pred)
result = metric.result()
result.numpy()

array([0.66067195, 0.        ], dtype=float32)

In [6]:
from sklearn.metrics import f1_score

f1 = f1_score(np.argmax(y_true, axis=1), np.argmax(pred, axis=1), average='macro')
print("f1_score: %.2f%%" % (f1 * 100.0))


f1_score: 33.03%


In [7]:
test = np.argmax(pred, axis=1)
print(test)

[0 0 0 ... 0 0 0]


In [8]:
pred

array([[0.500465, 0.499535],
       [0.500465, 0.499535],
       [0.500465, 0.499535],
       ...,
       [0.500465, 0.499535],
       [0.500465, 0.499535],
       [0.500465, 0.499535]], dtype=float32)