In [2]:
import sys
sys.path.append("..")

from glob import glob
import matplotlib.pyplot as plt
import ipywidgets as ipw
from IPython.display import Audio
import numpy as np 

from synthesizer import Synthesizer

from external import *

In [3]:
synthesizers_path = glob("../out/synthesizer/*/")
synthesizers_path.sort()

In [4]:
def show_synthesizer(synthesizer_path):
    synthesizer = Synthesizer.reload(synthesizer_path)
    
    dataset = synthesizer.dataset
    art_type = synthesizer.config["dataset"]["art_type"]
    sound_type = synthesizer.config["dataset"]["sound_type"]
    
    items_art = dataset.get_items_data(art_type, cut_silences=True)
    items_source = dataset.get_items_data("source", cut_silences=True)
    items_cepstrum = dataset.get_items_data(sound_type, cut_silences=True)
    
    sampling_rate = dataset.features_config["wav_sampling_rate"]
    
    items_name = dataset.get_items_list()
    
    def resynth_item(item_name):
        item_art = items_art[item_name]
        item_cesptrum = items_cepstrum[item_name]
        item_source = items_source[item_name]
        item_sound = np.concatenate((item_cesptrum, item_source), axis=1)
        item_wave = dataset.get_item_wave(item_name)
        
        nb_frames = len(item_sound)
        
        resynth_cepstrum = synthesizer.synthesize(item_art)
        resynth_sound = np.concatenate((resynth_cepstrum, item_source), axis=1)
        resynth_wave_from_melspec = lpcynet.synthesize_frames(item_sound)
        resynth_wave_from_art = lpcynet.synthesize_frames(resynth_sound)
        
        print("Original sound:")
        display(Audio(item_wave, rate=sampling_rate))
        print("Resynth (original %s → LPCNet):" % (sound_type))
        display(Audio(resynth_wave_from_melspec, rate=sampling_rate))
        print("Resynth (%s → estimated %s → LPCNet):" % (art_type, sound_type))
        display(Audio(resynth_wave_from_art, rate=sampling_rate))
        
        mse = np.mean((synthesizer.sound_scaler.transform(item_cesptrum) - synthesizer.sound_scaler.transform(resynth_cepstrum)) ** 2)
        print("MSE (normalized space):", mse)
        
        mse = np.mean((item_sound - resynth_sound) ** 2)
        print("MSE (data space):", mse)
        
        plt.figure(figsize=(nb_frames/20, 4), dpi=120)
        
        ax = plt.subplot(211)
        ax.set_title("original %s" % (sound_type))
        ax.imshow(item_sound.T, origin="lower")
        
        ax = plt.subplot(212)
        ax.set_title("%s resynthesized from %s" % (sound_type, art_type))
        ax.imshow(resynth_sound.T, origin="lower")
        
        plt.tight_layout()
        plt.show()
    
    display(ipw.interactive(resynth_item, item_name=items_name))

display(ipw.interactive(show_synthesizer, synthesizer_path=synthesizers_path))

interactive(children=(Dropdown(description='synthesizer_path', options=('../out/synthesizer\\ea587b76c95fecef0…