In [None]:
import os
import librosa
import numpy as np
from tensorflow import keras
import soundfile as sf

PATH_TO_FINE_TUNED_MODEL = "myModel/fine_tuned_model.h5"
NOISY_AUDIO_PATH = "noisy/noisy.wav"  
OUTPUT_AUDIO_PATH = "result/test.wav"  


SAMPLE_RATE = 16000
TARGET_SHAPE = (1024, 44)

def preprocess_audio(file_path, sample_rate=SAMPLE_RATE, target_shape=TARGET_SHAPE):
    y, sr = librosa.load(file_path, sr=sample_rate)
    
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    
    resized_spec = np.resize(log_mel_spec, target_shape)
    if resized_spec.shape[1] < target_shape[1]:
        padded_spec = np.pad(resized_spec, ((0, 0), (0, target_shape[1] - resized_spec.shape[1])), mode='constant')
    else:
        padded_spec = resized_spec[:, :target_shape[1]]
    
    return np.expand_dims(padded_spec, axis=-1)

def load_fine_tuned_model(model_path):
    return keras.models.load_model(model_path)

def denoise_audio(model, noisy_mel_spectrogram):
    noisy_mel_spectrogram = np.expand_dims(noisy_mel_spectrogram, axis=0)
    denoised_output = model.predict(noisy_mel_spectrogram)
    
    return np.squeeze(denoised_output, axis=0)

def postprocess_audio(denoised_mel_spectrogram, sample_rate=SAMPLE_RATE):
    if denoised_mel_spectrogram.shape[-1] == 1:
        denoised_mel_spectrogram = denoised_mel_spectrogram.squeeze(-1)
    
    print(f"Min: {denoised_mel_spectrogram.min()}, Max: {denoised_mel_spectrogram.max()}, Mean: {denoised_mel_spectrogram.mean()}")

    denoised_mel_spectrogram = librosa.db_to_power(denoised_mel_spectrogram)
    
    y_denoised = librosa.feature.inverse.mel_to_audio(denoised_mel_spectrogram, sr=sample_rate, n_iter=32)
    
    print(f"Generated waveform shape after mel-to-audio conversion: {y_denoised.shape}")
    return y_denoised


def main():
    noisy_mel_spectrogram = preprocess_audio(NOISY_AUDIO_PATH)
    print(f"Noisy mel spectrogram shape: {noisy_mel_spectrogram.shape}")
    
    model = load_fine_tuned_model(PATH_TO_FINE_TUNED_MODEL)
    
    denoised_mel_spectrogram = denoise_audio(model, noisy_mel_spectrogram)
    print(f"Denoised mel spectrogram shape: {denoised_mel_spectrogram.shape}")
    
    denoised_waveform = postprocess_audio(denoised_mel_spectrogram)
    print(f"Post-processed waveform shape: {denoised_waveform.shape}")
    
    if denoised_waveform.size == 0:
        print("Error: Generated waveform is empty.")
        return
    
    if len(denoised_waveform.shape) > 1 and denoised_waveform.shape[1] != 2:
        denoised_waveform = np.mean(denoised_waveform, axis=1) 
    
    denoised_waveform = denoised_waveform.astype(np.float32)
    
    os.makedirs(os.path.dirname(OUTPUT_AUDIO_PATH), exist_ok=True)
    sf.write(OUTPUT_AUDIO_PATH, denoised_waveform, SAMPLE_RATE)
    
    print(f"Denoised audio saved to {OUTPUT_AUDIO_PATH}")

main()




Noisy mel spectrogram shape: (1024, 44, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
Denoised mel spectrogram shape: (1024, 44, 1)
Min: 0.0, Max: 0.0, Mean: 0.0


  mel_basis = filters.mel(


Generated waveform shape after mel-to-audio conversion: (22016,)
Post-processed waveform shape: (22016,)
Denoised audio saved to result/test.wav


In [None]:
import numpy as np
import librosa

def match_audio_length(original, denoised):
    """Trim the longer audio signal to match the length of the shorter one."""
    min_length = min(len(original), len(denoised))
    return original[:min_length], denoised[:min_length]

original, sr = librosa.load("result/clean_audio.wav", sr=SAMPLE_RATE)
denoised, _ = librosa.load(OUTPUT_AUDIO_PATH, sr=SAMPLE_RATE)

original, denoised = match_audio_length(original, denoised)

psnr = calculate_psnr(original, denoised)
nrmse = calculate_nrmse(original, denoised)

print(f"PSNR: {psnr} dB")
print(f"NRMSE: {nrmse}")


  original, sr = librosa.load("result/clean_audio.wav", sr=SAMPLE_RATE)


PSNR: 16.240995401836482 dB
NRMSE: 0.22995634377002716
