In [2]:
import numpy as np
import librosa
import keras


SAMPLE_RATE = 2000


model_save_dir = '../saved_models/mlp_dense16_mu_compress_rate_2kHz'
model = keras.models.load_model(model_save_dir)
model.summary()


def add_background_noise(signal, epsilon=0.25):
    noise = np.random.uniform(low=-epsilon, high=epsilon, size=signal.shape)
    noisy_signal = signal + noise
    noisy_signal = np.clip(noisy_signal, -128, 127)
    return noisy_signal


def load_waveform(file_path):
    original_sr = 16000
    signal, _ = librosa.load(
        path=file_path,
        sr=original_sr,
        mono=True,
        offset=0.0,
        duration=1.0,
        dtype=np.float32, # original: 16-bit audio
        res_type='soxr_hq' # see docs for librosa.resample
    )
    noisy_signal = add_background_noise(signal)

    signal_downsample = librosa.resample(signal, original_sr, SAMPLE_RATE)
    noisy_signal_downsample = librosa.resample(noisy_signal, original_sr, SAMPLE_RATE)

    signal_downsample = librosa.mu_compress(signal_downsample, mu=255)
    noisy_signal_downsample = librosa.mu_compress(noisy_signal_downsample, mu=255)

    return signal, noisy_signal, signal_downsample, noisy_signal_downsample


sample_file = '/home/cameron/voice_data/LibriSpeech/train-clean-100/211/122425/211-122425-0019.flac'
#sample_file = '/home/cameron/voice_data/LibriSpeech/train-clean-100/4014/186179/4014-186179-0015.flac'

signal = load_waveform(sample_file)
noisy_signal = add_background_noise(signal)

y_signal = model.predict()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 16)                32016     
                                                                 
 dense_1 (Dense)             (None, 10)                170       
                                                                 
 softmax (Softmax)           (None, 10)                0         
                                                                 
Total params: 32186 (125.73 KB)
Trainable params: 32186 (125.73 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


2024-04-30 16:47:18.573123: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22422 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:17:00.0, compute capability: 8.6
