# Validation

## Dataset 불러오기

In [1]:
from tensorflow.keras.utils import to_categorical, Sequence
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from tqdm import tqdm
import numpy as np
import librosa
import joblib
import math
import os
import json

class Dataloader(Sequence):
    def __init__(self, Audios, labels, batch_size):
        self.Audios = Audios
        self.labels = labels
        self.batch_size = batch_size
        self.num_classes = len(set(self.labels))
        self.indices = np.arange(len(self.labels))
        self.scaler = StandardScaler()


    def __len__(self):
        return math.ceil(len(self.labels)/self.batch_size)
    
    def __getitem__(self, idx):
        indices = self.indices[idx*self.batch_size : (idx+1)*self.batch_size]
        batch_x = [self.Audios[i] for i in indices]
        batch_audios = self.get_Audios(batch_x)
        batch_y = [self.labels[i] for i in indices]
        # 라벨을 원-핫 인코딩
        #batch_y = to_categorical(batch_y, num_classes=self.num_classes)
        return np.array(batch_audios), np.array(batch_y)
    
    # 스펙트로그램 패딩 함수
    def extract_features(self, audio_file):
        y, sr = librosa.load(audio_file)

        mfccs = librosa.feature.mfcc(y=y, sr=sr)

        zero_crossing = librosa.feature.zero_crossing_rate(y)

        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)

        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)

        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

        features = np.concatenate((np.mean(mfccs, axis=1), [np.mean(zero_crossing), np.mean(spectral_rolloff), np.mean(chroma_stft), np.mean(spectral_contrast)]))
        
        return features
    

    def get_Audios(self, path_list):
        # 오디오 데이터 로딩 및 전처리
        features_list = []
        for file_path in path_list:
            features = self.extract_features(file_path)  # 패딩
            features_list.append(features)
        features_list = np.array(features_list)
        return self.scaler.fit_transform(features_list)


epochs = 50

# JSON 파일 로딩 및 데이터 및 라벨 생성
with open('../test_dataset.json', 'r') as f:
    folder_label_mapping = json.load(f)

file_paths = []
labels = []

# 각 폴더 및 라벨에 대해
for file_path, label in folder_label_mapping.items():
    # 파일 확장자 확인하여 wav 파일만 처리
    if file_path.lower().endswith('.wav'):
        # 파일 경로 및 라벨 저장
        file_paths.append(file_path)
        labels.append(label)


Valid_dataloader = Dataloader(file_paths, labels, 16)

2023-12-14 06:44:42.691804: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-14 06:44:42.691864: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-14 06:44:42.691882: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-14 06:44:42.699497: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 모델 예측하기

In [2]:

svm_model = joblib.load('./model/svm_50epochs_model_sum.json')

In [3]:
pred_list = []
label_list = []
#4. 예측 및 정확도 계산
for i in tqdm(range(Valid_dataloader.__len__())):
    x, y = Valid_dataloader.__getitem__(i)
    pred = svm_model.predict(x)
    pred_list.append(pred)
    label_list.append(y)



100%|██████████| 601/601 [22:22<00:00,  2.23s/it]


## Score 보기

In [4]:
print(pred_list[0])
print(label_list[0])

[0 0 0 0 1 1 0 0 0 0 0 0 1 0 1 1]
[0 0 1 1 1 0 1 0 0 0 0 1 1 1 1 1]


In [5]:
def list_concat(x):
    temp = []
    for i in x:
        if len(temp) > 0:
            np.concatenate((temp, i))
        else:
            temp = i
    return temp

pred_list_concat = list_concat(pred_list)
label_list_concat = list_concat(label_list)
#pred_list_concat = to_categorical(np.argmax(pred_list_concat, axis=1))
#label_list_concat = to_categorical(np.argmax(label_list_concat, axis=1))

In [6]:
accuracy = accuracy_score(label_list_concat, pred_list_concat)
print("Accuracy: %.2f%%" % (accuracy * 100.0))


Accuracy: 62.50%


In [7]:
from sklearn.metrics import f1_score

f1 = f1_score(label_list_concat, pred_list_concat, average='macro')
print("f1_score: %.2f%%" % (f1 * 100.0))



f1_score: 61.90%


In [8]:
from sklearn.metrics import log_loss
# 로그 손실 계산
loss = log_loss(label_list_concat, pred_list_concat)
print("loss_score: %.2f%%" % (loss))


loss_score: 13.52%
