In [23]:
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.layers import Input, Conv2D, Lambda

from tensorflow.keras.utils import register_keras_serializable

import numpy as np
import random
import pandas as pd
import keras

In [5]:
import os
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # or any {'0', '1', '2'}
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"



In [6]:
import warnings
warnings.filterwarnings("ignore")

In [7]:
class CONFIG:
    SEED = 42
    CLASSIFIER_LR = 3e-4
    EPOCH = 200
    BATCH_SIZE = 8
    VOICE_DIR = './cv-corpus-19.0-2024-09-13/ko/clips/'
    NOISE_DIR = './ESC-50-master/audio/'

In [8]:
def periodic_hann_window(window_length, dtype):
    return 0.5 - 0.5 * tf.math.cos(2.0 *
                                   np.pi *
                                   tf.range(tf.cast(window_length, tf.float32)) /
                                   tf.cast(window_length, tf.float32))

In [9]:
def wave2log_mel_spectrogram(wave):
    signal_stft = tf.signal.stft(tf.cast(wave, tf.float32),
                                 frame_length=640,
                                 frame_step=320,
                                 fft_length=1024,
                                 window_fn=periodic_hann_window)
    # print(signal_stft.shape) # (49, 513)

    spectogram = tf.abs(signal_stft)

    linear_to_mel = tf.signal.linear_to_mel_weight_matrix(80,
                                signal_stft.shape[-1],
                                16000,
                                300.0,
                                4000.0)
    mel_spectrogram = tf.tensordot(spectogram, linear_to_mel, 1)
    log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-12)
    return log_mel_spectrogram

In [10]:
def data_generator(df):
    for _, row in df.iterrows():
        path = row['path']
        label = row['label']

        audio_raw = tf.io.read_file(path)
        wave, sr = tf.audio.decode_wav(audio_raw, desired_channels=1)
        wave = tf.squeeze(wave, axis=-1)
        # print(wave.shape) (16000, )
        log_mel_spectrogram = wave2log_mel_spectrogram(wave)
        log_mel_spectrogram = np.expand_dims(log_mel_spectrogram, axis=-1)
        print(log_mel_spectrogram.shape) #(49, 80)

        yield log_mel_spectrogram, label


In [11]:
train_df = pd.read_csv('train_dataset.csv')


In [12]:
valid_df = pd.read_csv('valid_dataset.csv')


In [13]:
test_df = pd.read_csv('test_dataset.csv')


In [14]:
train_dataset = tf.data.Dataset.from_generator(lambda: data_generator(train_df), output_signature=(tf.TensorSpec(shape=(49, 80, 1), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int32))).batch(CONFIG.BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
valid_dataset = tf.data.Dataset.from_generator(lambda: data_generator(valid_df), output_signature=(tf.TensorSpec(shape=(49, 80, 1), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int32))).batch(CONFIG.BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_generator(lambda: data_generator(test_df), output_signature=(tf.TensorSpec(shape=(49, 80, 1), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int32))).batch(CONFIG.BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)

2024-11-26 10:23:15.557455: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-11-26 10:23:15.557604: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12/lib64:
2024-11-26 10:23:15.557673: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12/lib64:
2024-11-26 10:23:15.557728: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory; LD_LI

In [15]:
# model = models.Sequential([
#             layers.Input(shape=(49, 80, 1)),
#             layers.Reshape((49, 80)),
#             layers.RNN(layers.LSTMCell(80)),
#             layers.Dense(2, activation='softmax')
#             ])

In [None]:
model = models.Sequential([
            layers.Input(shape=(49, 80, 1)),
            layers.Reshape((49, 80)),
            layers.LSTM(80),
            #layers.UniDirectionalLSTM(80),
            layers.Dense(2, activation='softmax')
            ])

AttributeError: module 'tensorflow.keras.layers' has no attribute 'UniDirectionalLSTM'

In [17]:

model.compile(optimizer=Adam(learning_rate=CONFIG.CLASSIFIER_LR),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=2, restore_best_weights=True)


history = model.fit(train_dataset, verbose=2, epochs=CONFIG.EPOCH, validation_data=valid_dataset, callbacks=[early_stopping], steps_per_epoch=len(train_df)//CONFIG.BATCH_SIZE, validation_steps=len(valid_df)//CONFIG.BATCH_SIZE)

Epoch 1/200
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49,

2024-11-26 10:23:20.647935: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49,

2024-11-26 10:23:21.560688: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 49, 80)            0         
                                                                 
 lstm (LSTM)                 (None, 80)                51520     
                                                                 
 dense (Dense)               (None, 2)                 162       
                                                                 
Total params: 51,682
Trainable params: 51,682
Non-trainable params: 0
_________________________________________________________________


In [19]:
test_loss, test_accuracy = model.evaluate(test_dataset)

(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
      1/Unknown - 0s 78ms/step - loss: 0.4222 - accuracy: 0.8750(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
      2/Unknown - 0s 57ms/step - loss: 0.5100 - accuracy: 0.8125(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
      3/Unknown - 0s 57ms/step - loss: 0.5205 - accuracy: 0.7917(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
      4/Unknown - 0s 57ms/step - loss: 0.6402 - accuracy: 0.6875(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
      5/Unknown - 0s 57ms/step - loss: 0.6084 - accuracy: 0.7000(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
      6/Unknown - 0s 58ms/step - loss: 0.6056 - accuracy: 0.7292(49, 80, 1)
(49, 80, 1)
(49, 80, 1)
(49,

In [20]:
sample_data = np.random.rand(32, 49, 80, 1)  # 배치 크기 32
result = model.predict(sample_data)
print(result.shape)  # 출력: (32, 2)

(32, 2)


In [21]:
inputs = tf.random.normal([32, 49, 80, 1])  # Example input
model(inputs)  # Build the model by passing inputs
model.save('./vad_lstm_v3_29.h5')


In [22]:
model.export('./vad_lstm_v3_29')

AttributeError: 'Sequential' object has no attribute 'export'