In [None]:
%load_ext autoreload
import ddsp
from pathlib import Path
import os
import torch
import yaml
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']= (18, 7.5*3/2)
plt.rcParams['figure.dpi']= 300

os.getcwd()
os.chdir('/home/hugo/lab/ddsp_pytorch/')

In [None]:
def decoder_interpolator(config1, config2, data_config, pm=1, ladd=0, calpha=None, batch=True):

    model = ddsp.export.get_ddsp_interpolator(config1, config2)

    data_config['train']['batch'] = 6
    dm = ddsp.data.Datamodule(data_config)
    dm.setup()

    train_loader = dm.train_dataloader()
    val_loader = dm.val_dataloader()
    
    b = next(iter(val_loader))
    if not batch:
        pitch = torch.cat([a for a in b['f0']], dim=0).unsqueeze(0)
        loudness = torch.cat([a for a in b['loudness']], dim=0).unsqueeze(0)
    else:
        pitch = b['f0']
        loudness = b['loudness']
        
    loudness = loudness 
    print(loudness.mean(), loudness.std())
    print(pitch.mean(), pitch.std())

    # build a linear ramp from 0 to 1 as alpha
    n_bins = pitch.shape[1] * pitch.shape[0]
    alpha =  1 - 1 / n_bins * torch.arange(n_bins)
    alpha = alpha.unsqueeze(0).unsqueeze(-1)
    if calpha is not None: 
        alpha = calpha
        
    if isinstance(alpha, torch.Tensor):
#         alpha = torch.stack([alpha for _ in range(len(pitch))]).squeeze(1)
        alpha = alpha.view(pitch.shape)
#         print(alpha.shape)
        
#     print(pitch.shape)
    
    def batch_cat(d: dict):
        for k, v in d.items():
            if isinstance(v, torch.Tensor):
                print(v.shape)
                if v.ndim > 2:
                    d[k] = torch.cat([a for a in v], dim=0).unsqueeze(0)
                    
            if isinstance(v, list):
                d[k] = torch.cat(v, dim=0)
            if isinstance(v, dict):
                d[k] =  batch_cat(v)
        return d

    model.eval()
    with torch.no_grad():
        output = model(pitch*pm, loudness, alpha, ladd)
#         b['loudness'] = ddsp.export.normalize_loudness(config1, b['loudness']) + ladd
#         b['f0']  *= pm
#         output = model.ddsp1(b)
        
        if batch:
            output = batch_cat(output)
        output['alpha'] = alpha
    return model, output

In [None]:
def plot_interpolator(model, output1, output2, outputinterp):
    plt.clf()
    ax = plt.subplot(331)
    model.ddsp1.harmonic_synth.plot(ax, output1['harmonic_ctrls'])
    
    ax = plt.subplot(332)
    model.ddsp1.noise_synth.plot(ax, output1['noise_ctrls'])
    
    ax = plt.subplot(333)
    stft = ddsp.multiscale_fft(output1['signal'].reshape(-1), [2048], 0.75)[0]
    stft = ddsp.utils.tonp(stft)
    stft = ddsp.utils.stft_to_mel(stft, 
                                 model.config1['preprocess']['sample_rate'], 
                                     2048, 512)
    ax.set_title('spectrogram')
    ddsp.utils.plot_spec(stft, ax)
    
    
    ax = plt.subplot(334)
    model.ddsp1.harmonic_synth.plot(ax, output2['harmonic_ctrls'])
    
    ax = plt.subplot(335)
    model.ddsp1.noise_synth.plot(ax, output2['noise_ctrls'])
    
    ax = plt.subplot(336)
    stft = ddsp.multiscale_fft(output2['signal'].reshape(-1), [2048], 0.75)[0]
    stft = ddsp.utils.tonp(stft)
    stft = ddsp.utils.stft_to_mel(stft, 
                                 model.config1['preprocess']['sample_rate'], 
                                     2048, 512)
    ax.set_title('spectrogram')
    ddsp.utils.plot_spec(stft, ax)
    
    ax = plt.subplot(337)
    model.ddsp1.harmonic_synth.plot(ax, outputinterp['harmonic_ctrls'])
    
    ax = plt.subplot(338)
    model.ddsp1.noise_synth.plot(ax, outputinterp['noise_ctrls'])
    
    ax = plt.subplot(339)
    stft = ddsp.multiscale_fft(outputinterp['signal'].reshape(-1), [2048], 0.75)[0]
    stft = ddsp.utils.tonp(stft)
    stft = ddsp.utils.stft_to_mel(stft, 
                                 model.config1['preprocess']['sample_rate'], 
                                     2048, 512)
    ax.set_title('spectrogram')
    ddsp.utils.plot_spec(stft, ax)
    
#     ax = plt.subplot(234)
#     plt.plot(ddsp.utils.hz_to_midi(output['f0'].view(-1)))
#     ax.set_title('f0 (midi)')
#     ax.set_ylim([16, 120])
#     ax.set_xlabel('frames')
    
#     ax = plt.subplot(235)
#     plt.plot(output['loudness'].view(-1))
#     ax.set_title('loudness')
#     ax.set_xlabel('frames')
    
#     ax = plt.subplot(236)
#     if isinstance(output['alpha'], int):
#         output['alpha'] = torch.ones_like(output['loudness'])*output['alpha']
#     plt.plot(output['alpha'].view(-1))
#     ax.set_title('alpha')
             
    plt.tight_layout()
    return plt.gcf()

def play_output(output):
    audio = output['signal']
    import IPython.display as ipd
    return ipd.Audio(audio.reshape(-1), rate=48000) # load a NumPy array

In [None]:
old_configs = {
    'violin': ddsp.export.load_config('./runs/violin-omg/config.yaml'), 
    'flute': ddsp.export.load_config('./runs/flute-omg/config.yaml'), 
    'reed': ddsp.export.load_config('./runs/reed_acoustic_011/config.yaml'),
    'string': ddsp.export.load_config('./runs/string_acoustic_080/config.yaml'),
    'bass_synth': ddsp.export.load_config('./runs/bass_synthetic_012/config.yaml'),
    'flute_synth': ddsp.export.load_config('./runs/flute_synthetic_002/config.yaml'),
    'rainbow': ddsp.export.load_config('./configs/rainbow.yaml')
}

configs = {
    p.parent.name: ddsp.export.load_config(p)
        for p in Path('./runs').glob('**/config.yaml')
}
configs['rainbow'] = old_configs['rainbow']
configs['me'] = ddsp.export.load_config('./configs/me.yaml')
configs['demo'] = ddsp.export.load_config('./configs/demo.yaml')
list(configs.keys())


In [None]:
model, output1 = decoder_interpolator( configs['violin-dcdr'], configs['clarinet-dcdr'],   configs['demo'], 
                                     pm=1, ladd=1, calpha=1)
# model, output2 = decoder_interpolator( configs['violin-dcdr'], configs['clarinet-dcdr'],  configs['demo'], 
#                                      pm=2, ladd=0.75, calpha=0)
# model, outputinterp = decoder_interpolator( configs['violin-dcdr'], configs['clarinet-dcdr'],  configs['demo'], 
#                                      pm=2, ladd=0.75, calpha=None)

# plot_interpolator(model, output1, output2, outputinterp)
plot_interpolator(model, output1, output1, output1)

In [None]:
play_output(output1)

In [None]:
play_output(output2)

In [None]:
play_output(outputinterp)

In [None]:
model, output = decoder_interpolator( configs['string_acoustic_080'], configs['reed_acoustic_011'], configs['me'], 
                                     pm=1, ladd=-1, calpha=None)
audio = output['signal']

plot_interpolator(model, output)
import IPython.display as ipd
ipd.Audio(audio.reshape(-1), rate=48000) # load a NumPy array

In [None]:
model, output = decoder_interpolator( configs['keyboard_acoustic_016'], configs['reed_acoustic_011'], configs['rainbow'], 
                                     pm=2, ladd=1.3, calpha=None)
audio = output['signal']

plot_interpolator(model, output)
import IPython.display as ipd
ipd.Audio(audio.reshape(-1), rate=48000) # load a NumPy array

In [None]:
model, output = decoder_interpolator( configs['string_acoustic_080'], configs['guitar_acoustic_016'], configs['rainbow'], 
                                     pm=2, ladd=0, calpha=None)
audio = output['signal']

plot_interpolator(model, output)
import IPython.display as ipd
ipd.Audio(audio.reshape(-1), rate=48000) # load a NumPy array

In [None]:
model, output = decoder_interpolator( configs['guitar_electronic_001'], configs['guitar_acoustic_016'], configs['rainbow'], 
                                     pm=2, ladd=0, calpha=None)
audio = output['signal']

plot_interpolator(model, output)
import IPython.display as ipd
ipd.Audio(audio.reshape(-1), rate=48000) # load a NumPy array

In [None]:
import numpy as np

SR = 16000
n_harmonics = 1

f0 = 220*1
t = np.arange(SR) / SR
sig = np.zeros(SR)
for n in range(1, n_harmonics+1):
    sig += 1 / n * np.sin(2 * np.pi * n * f0 * t)

plt.plot(sig[0:256])
ipd.Audio(sig, rate=SR)

$$
\sum_{n=1}^{N}\frac{1}{n} sin(2\pi \cdot nf_0)
$$

In [None]:
mconfig = configs['violin-dcdr']
data_config = configs['demo']
ladd = 1
pm = 2

model = ddsp.train.load_model(mconfig)
model = ddsp.export.load_model_state_dict(model, mconfig)
model.has_reverb = False

data_config['train']['batch'] = 10
dm = ddsp.data.Datamodule(data_config)
dm.setup()

f0 = torch.tensor(dm.train_data.pitchs).unsqueeze(-1) * pm
loudness = torch.tensor(dm.train_data.loudness).unsqueeze(-1)
loudness = ddsp.export.normalize_loudness(mconfig, loudness)
loudness = loudness + ladd
print(f0.shape)

with torch.no_grad():
    model.eval()
    output = model({'f0': f0, 'loudness': loudness})

model.ddsp1 = model
model.config1 = mconfig

plot_interpolator(model, output, output, output)
play_output(output)

In [None]:
mconfig = configs['clarinet-dcdr']
data_config = configs['demo']
ladd = 1
pm = 2

model = ddsp.train.load_model(mconfig)
model = ddsp.export.load_model_state_dict(model, mconfig)

data_config['train']['batch'] = 10
dm = ddsp.data.Datamodule(data_config)
dm.setup()

f0 = torch.tensor(dm.train_data.pitchs).unsqueeze(-1) * pm
loudness = torch.tensor(dm.train_data.loudness).unsqueeze(-1)
loudness = ddsp.export.normalize_loudness(mconfig, loudness)
loudness = loudness + ladd
print(f0.shape)

with torch.no_grad():
    model.eval()
    output = model({'f0': f0, 'loudness': loudness})

model.ddsp1 = model
model.config1 = mconfig

plot_interpolator(model, output, output, output)
play_output(output)

In [None]:
mconfig = configs['flute-dcdr']
data_config = configs['demo']
ladd = 2
pm = 2

model = ddsp.train.load_model(mconfig)
model = ddsp.export.load_model_state_dict(model, mconfig)

data_config['train']['batch'] = 10
dm = ddsp.data.Datamodule(data_config)
dm.setup()

f0 = torch.tensor(dm.train_data.pitchs).unsqueeze(-1) * pm
loudness = torch.tensor(dm.train_data.loudness).unsqueeze(-1)
loudness = ddsp.export.normalize_loudness(mconfig, loudness)
loudness = loudness + ladd
print(f0.shape)

with torch.no_grad():
    model.eval()
    output = model({'f0': f0, 'loudness': loudness})

model.ddsp1 = model
model.config1 = mconfig

plot_interpolator(model, output, output, output)
play_output(output)

In [None]:
mconfig = configs['tenor-dcdr']
data_config = configs['demo']
ladd = 1.7
pm = 0.5

model = ddsp.train.load_model(mconfig)
model = ddsp.export.load_model_state_dict(model, mconfig)


data_config['train']['batch'] = 10
dm = ddsp.data.Datamodule(data_config)
dm.setup()

f0 = torch.tensor(dm.train_data.pitchs).unsqueeze(-1) * pm
loudness = torch.tensor(dm.train_data.loudness).unsqueeze(-1)
loudness = ddsp.export.normalize_loudness(mconfig, loudness)
loudness = loudness + ladd
print(f0.shape)

with torch.no_grad():
    model.eval()
    output = model({'f0': f0, 'loudness': loudness})

model.ddsp1 = model
model.config1 = mconfig

plot_interpolator(model, output, output, output)
play_output(output)