In [None]:
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.layers import Input, Conv2D

import numpy as np
import random
from audiomentations import AddBackgroundNoise
import pandas as pd
from tqdm import tqdm
from keras_tqdm import TQDMNotebookCallback
import keras

2024-11-15 16:30:43.653576: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731655843.666193  670019 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731655843.670126  670019 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-15 16:30:43.682427: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
import os
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}


In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
class CONFIG:
    SEED = 42
    CLASSIFIER_LR = 3e-4
    EPOCH = 200
    BATCH_SIZE = 8
    VOICE_DIR = './cv-corpus-19.0-2024-09-13/ko/clips/'
    NOISE_DIR = './ESC-50-master/audio/'

In [None]:
def periodic_hann_window(window_length, dtype):
    return 0.5 - 0.5 * tf.math.cos(2.0 *
                                   np.pi *
                                   tf.range(tf.cast(window_length, tf.float32)) /
                                   tf.cast(window_length, tf.float32))

In [None]:
def wave2log_mel_spectrogram(wave):
    signal_stft = tf.signal.stft(tf.cast(wave, tf.float32),
                                 frame_length=640,
                                 frame_step=320,
                                 fft_length=1024,
                                 window_fn=periodic_hann_window)
    print(signal_stft.shape) # (49, 513)

    spectogram = tf.abs(signal_stft)

    linear_to_mel = tf.signal.linear_to_mel_weight_matrix(80,
                                signal_stft.shape[-1],
                                16000,
                                300.0,
                                4000.0)
    mel_spectrogram = tf.tensordot(spectogram, linear_to_mel, 1)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-12)
    return log_mel_spectrogram

In [None]:
def data_generator(df):
    for _, row in df.iterrows():
        path = row['path']
        label = row['label']

        audio_raw = tf.io.read_file(path)
        wave, sr = tf.audio.decode_wav(audio_raw, desired_channels=1)
        wave = tf.squeeze(wave, axis=-1)
        # print(wave.shape) (16000, )
        log_mel_spectrogram = wave2log_mel_spectrogram(wave)
        log_mel_spectrogram = np.expand_dims(log_mel_spectrogram, axis=-1)
        # print(log_mel_spectrogram.shape) #(49, 80 ,1)

        yield log_mel_spectrogram, label


In [None]:
train_df = pd.read_csv('train_dataset.csv')


In [None]:
valid_df = pd.read_csv('valid_dataset.csv')


In [None]:
test_df = pd.read_csv('test_dataset.csv')


In [None]:
train_dataset = tf.data.Dataset.from_generator(lambda: data_generator(train_df), output_signature=(tf.TensorSpec(shape=(49, 80, 1), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int32))).batch(CONFIG.BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
valid_dataset = tf.data.Dataset.from_generator(lambda: data_generator(valid_df), output_signature=(tf.TensorSpec(shape=(49, 80, 1), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int32))).batch(CONFIG.BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_generator(lambda: data_generator(test_df), output_signature=(tf.TensorSpec(shape=(49, 80, 1), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int32))).batch(CONFIG.BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)

I0000 00:00:1731655845.505635  670019 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1290 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5


In [None]:
model = models.Sequential([
            layers.Conv2D(8, kernel_size=3, padding='same', activation='relu', input_shape=(49, 80, 1)),
            layers.Conv2D(16, kernel_size=3, padding='same', activation='relu'),
            layers.Conv2D(8, kernel_size=3, padding='same', activation='relu'),
            layers.Conv2D(2, kernel_size=3, padding='same', activation='relu'),
            layers.Flatten(),
            layers.Dense(2, activation='softmax')
            ])

In [None]:

model.compile(optimizer=Adam(learning_rate=CONFIG.CLASSIFIER_LR),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=2, restore_best_weights=True)


history = model.fit(train_dataset, verbose=2, epochs=CONFIG.EPOCH, validation_data=valid_dataset, callbacks=[early_stopping], steps_per_epoch=len(train_df)//CONFIG.BATCH_SIZE, validation_steps=len(valid_df)//CONFIG.BATCH_SIZE)

Epoch 1/200


I0000 00:00:1731655847.883326  670108 service.cc:148] XLA service 0x702e24004c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731655847.883344  670108 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 2070 SUPER, Compute Capability 7.5
2024-11-15 16:30:47.908409: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1731655848.020604  670108 cuda_dnn.cc:529] Loaded cuDNN version 90300
2024-11-15 16:30:48.148603: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=0} for conv (f32[8,8,49,80]{3,2,1,0}, u8[0]{0}) custom-call(f32[8,1,49,80]{3,2,1,0}, f32[8,1,3,3]{3,2,1,0}, f32[8]{0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"cudnn_conv_backend_conf

53/53 - 12s - 220ms/step - accuracy: 0.6297 - loss: 0.7064 - val_accuracy: 0.5962 - val_loss: 0.6803
Epoch 2/200


2024-11-15 16:30:58.575158: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-11-15 16:30:58.575189: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-11-15 16:30:58.575198: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 768038531317425151
2024-11-15 16:30:58.575206: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 10582860279751940944
2024-11-15 16:30:58.621012: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=0} for conv (f32[3,8,49,80]{3,2,1,0}, u8[0]{0}) custom-call(f32[3,1,49,80]{3,2,1,0}, f32[8,1,3,3]{3,2,1,0}, f32[8]{0}), window={size=3x3 pad=1_1x1_1}, dim_la

53/53 - 1s - 20ms/step - accuracy: 1.0000 - loss: 0.2981 - val_accuracy: 1.0000 - val_loss: 0.1584
Epoch 3/200
53/53 - 0s - 1ms/step - accuracy: 0.6887 - loss: 0.5723 - val_accuracy: 0.5962 - val_loss: 0.6739
Epoch 4/200
53/53 - 0s - 160us/step - accuracy: 1.0000 - loss: 0.3032 - val_accuracy: 1.0000 - val_loss: 0.1382
Epoch 5/200
53/53 - 0s - 1ms/step - accuracy: 0.6981 - loss: 0.5506 - val_accuracy: 0.6058 - val_loss: 0.6690
Epoch 6/200
53/53 - 0s - 153us/step - accuracy: 1.0000 - loss: 0.2961 - val_accuracy: 1.0000 - val_loss: 0.1357
Epoch 7/200


2024-11-15 16:30:58.838348: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-11-15 16:30:58.838381: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 768038531317425151
2024-11-15 16:30:58.838391: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 10582860279751940944
2024-11-15 16:30:58.912821: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 768038531317425151
2024-11-15 16:30:58.915644: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-11-15 16:30:58.915656: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 7680385

53/53 - 0s - 1ms/step - accuracy: 0.7123 - loss: 0.5310 - val_accuracy: 0.6250 - val_loss: 0.6605
Epoch 8/200
53/53 - 0s - 145us/step - accuracy: 1.0000 - loss: 0.2860 - val_accuracy: 1.0000 - val_loss: 0.1386
Epoch 9/200
53/53 - 0s - 1ms/step - accuracy: 0.7358 - loss: 0.5023 - val_accuracy: 0.6538 - val_loss: 0.6284
Epoch 9: early stopping
Restoring model weights from the end of the best epoch: 6.


2024-11-15 16:30:59.066812: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2024-11-15 16:30:59.066837: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 768038531317425151
2024-11-15 16:30:59.066846: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 10582860279751940944


In [None]:
test_loss, test_accuracy = model.evaluate(test_dataset)

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 134ms/step - accuracy: 0.6637 - loss: 0.6198


2024-11-15 16:31:05.095457: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 768038531317425151
2024-11-15 16:31:05.095485: I tensorflow/core/framework/local_rendezvous.cc:424] Local rendezvous recv item cancelled. Key hash: 10582860279751940944


In [None]:
# model.trainable = False
# tf.saved_model.save(model, "./vad_v1")
# model.summary()

INFO:tensorflow:Assets written to: ./vad_v1/assets


INFO:tensorflow:Assets written to: ./vad_v1/assets


In [None]:
model.save('./vad_v1.h5')




In [None]:
model = tf.keras.models.load_model('./vad_v1.h5')
model.summary()

