In [18]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import ipywidgets as widgets
from IPython.display import clear_output

In [19]:
mel_folder = "/home/ibrahim/english_tts/hifigan_test_demo_github/mel_spectrograms_22k"   # folder with .npy mel spectrograms
original_folder = "/home/ibrahim/english_tts/hifigan_test_demo_github/random_wavs_16k"     # folder with original WAVs
generated_folder_vctk_v1 = "/home/ibrahim/english_tts/hifigan_test_demo_github/generated_wavs_vctk_v1"
generated_folder_lj_v1 = "/home/ibrahim/english_tts/hifigan_test_demo_github/generated_wavs_lj_v1"

files = sorted([os.path.splitext(f)[0] for f in os.listdir(original_folder) if f.endswith(".wav")])

In [20]:
n_fft = 1024
hop_length = 256
win_length = 1024
n_mels = 80

target_sr = 22050

In [21]:
def show_mel(mel_path):
    mel = np.load(mel_path)

    plt.figure(figsize=(8,3))
    librosa.display.specshow(mel, sr=22050, hop_length=256, x_axis='time', y_axis='mel', cmap='magma')
    plt.colorbar(format='%+2.0f dB')
    plt.title(os.path.basename(mel_path))
    plt.show()

In [22]:
def wav_to_mel(wav_path, target_sr=22050):

    y, orig_sr = librosa.load(wav_path, sr=None, mono=True)
    
    if orig_sr != target_sr:
        y = librosa.resample(y, orig_sr=orig_sr, target_sr=target_sr)
    

    mel_spec = librosa.feature.melspectrogram(
        y=y,
        sr=target_sr,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length,
        n_mels=n_mels
    )
    

    mel_spec = np.log(np.clip(mel_spec, a_min=1e-5, a_max=None))
    
    return mel_spec

In [23]:


def plot_mel_from_audio(wav_path, sr=22050, n_fft=1024, hop_length=256, win_length=1024, n_mels=80):
    """
    Calculate mel spectrogram from a wav file and display it
    """
    y, orig_sr = librosa.load(wav_path, sr=None)
    if orig_sr != sr:
        y = librosa.resample(y, orig_sr=orig_sr, target_sr=sr)
    
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=n_fft, hop_length=hop_length,
        win_length=win_length, n_mels=n_mels
    )
    mel_spec = np.log(np.clip(mel_spec, 1e-5, None))
    
    plt.figure(figsize=(8,3))
    librosa.display.specshow(mel_spec, sr=sr, hop_length=hop_length, x_axis='time', y_axis='mel', cmap='magma')
    plt.colorbar(format='%+2.0f dB')
    plt.title(os.path.basename(wav_path))
    plt.show()


In [24]:
file_dropdown = widgets.Dropdown(
    options=files,
    description='Audio:',
    layout=widgets.Layout(width='50%')
)

output = widgets.Output()

def show_demo(change):
    with output:
        clear_output(wait=True)
        fname = change['new']
        
        mel_path = os.path.join(mel_folder, fname + ".npy")

        orig_file = os.path.join(original_folder, fname + ".wav")
        if os.path.exists(orig_file):
            print("Original audio:")
            display(Audio(orig_file))
            print("Mel spectrogram (original):")
            show_mel(mel_path)
        else:
            print("Original audio not found!")

        gen_file_vctk = os.path.join(generated_folder_vctk_v1, fname + "_generated_e2e.wav")
        if os.path.exists(gen_file_vctk):
            print("Generated audio (VCTK_V1):")
            display(Audio(gen_file_vctk))
            print("Mel spectrogram (VCTK_V1):")
            plot_mel_from_audio(gen_file_vctk)
        else:
            print("Generated audio not found!")
        
        gen_file_lj = os.path.join(generated_folder_lj_v1, fname + "_generated_e2e.wav")
        if os.path.exists(gen_file_lj):
            print("Generated audio (LJ_V1):")
            display(Audio(gen_file_lj))
            print("Mel spectrogram (LJ_V1):")
            plot_mel_from_audio(gen_file_lj)
        else:
            print("Generated audio not found!")

file_dropdown.observe(show_demo, names='value')

display(file_dropdown, output)
file_dropdown.value = files[0]


Dropdown(description='Audio:', layout=Layout(width='50%'), options=('jane_eyre_06_f000124', 'jane_eyre_10_f000â€¦

Output()