# Moog VCF

In [4]:
import numpy
from scipy.fftpack import fft
import numpy as np
import librosa
import librosa.display
from IPython.display import Audio
import matplotlib.pyplot as plt
%matplotlib inline

ModuleNotFoundError: No module named 'librosa'

# Generate data from wav

In [None]:
target_output, sr = librosa.load('./sound_examples/train/train_input_o.wav',sr=48e3)
data = target_output
# print data info
print(data.shape)
print(sr)

# plot data
time = np.arange(0,len(data)) / sr
fig, ax = plt.subplots()
ax.plot(time, data)
ax.set(xlabel='Time(s)', ylabel='amplitude')
plt.show()

# plot mel spectrogram
spec = librosa.feature.melspectrogram(y = data, sr = sr)
db_spec = librosa.power_to_db(spec, ref = np.max,)
log_spec = librosa.amplitude_to_db(spec)
librosa.display.specshow(spec, y_axis='mel', x_axis='s', sr = sr)
plt.colorbar()

# plot spectrogram
D = np.abs(librosa.stft(data))**2
S = librosa.feature.melspectrogram(S=D)
S = librosa.feature.melspectrogram(y=data, sr=sr, n_mels=128, fmax = 8000)
plt.figure(figsize=(21,4))
librosa.display.specshow(librosa.power_to_db(S,ref=np.max),y_axis = 'mel', fmax=8000, x_axis='time')
plt.colorbar(format = '%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

# audio file
print('target output')
Audio(data = data, rate = sr)

In [None]:
training_input, sr = librosa.load('./sound_examples/train/train_input.wav',sr=48e3)
training_input = training_input[0:target_output.size]
data = training_input
# print data info
print(data.shape)
print(sr)

# plot data
time = np.arange(0,len(data)) / sr
fig, ax = plt.subplots()
ax.plot(time, data)
ax.set(xlabel='Time(s)', ylabel='amplitude')
plt.show()

# plot mel spectrogram
spec = librosa.feature.melspectrogram(y = data, sr = sr)
db_spec = librosa.power_to_db(spec, ref = np.max,)
log_spec = librosa.amplitude_to_db(spec)
librosa.display.specshow(spec, y_axis='mel', x_axis='s', sr = sr)
plt.colorbar()

# plot spectrogram
D = np.abs(librosa.stft(data))**2
S = librosa.feature.melspectrogram(S=D)
S = librosa.feature.melspectrogram(y=data, sr=sr, n_mels=128, fmax = 8000)
plt.figure(figsize=(15,4))
librosa.display.specshow(librosa.power_to_db(S,ref=np.max),y_axis = 'mel', fmax=8000, x_axis='time')
plt.colorbar(format = '%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

# audio file
print('train input')
Audio(data= data, rate = sr)


In [None]:
print(training_input)

In [None]:
print(target_output)

## Declare Model
Example will only be shown for this model, the other models are analogous.

In [None]:
import torch
# model_type = "MoogVCFRF_parameterized" #or "TDF-II" or "LSS" or "DOPOZ" or "SVF"
model_type = "MoogVCF_circuit_parameterized" #or "TDF-II" or "LSS" or "DOPOZ" or "SVF"

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

device = torch.device("cpu")

if model_type == "MoogVCFRF":
    from MoogVCF_rf2 import MoogVCFRFModel
    model = MoogVCFRFModel(0.7, 1000.0, 0.1, 48e3).to(device)
    print("Moog VCF RF initialized successfully")
elif model_type == "MoogVCFRF_parameterized":
    from MoogVCF_rf2_parameterized import MoogVCFRFModel
    model = MoogVCFRFModel(0.7, 1000.0, 0.1, 48e3).to(device)
    print("Moog VCF RF parameterized model initialized successfully")
elif model_type == "MoogVCF_circuit":
    from MoogVCF_rf3 import MoogVCFRFModel
    model = MoogVCFRFModel(0.7,5e-5,4.7e-8,2.5e-2, 1.0, 48e3).to(device)
    print("Moog VCF circuit data model initialized successfully")
elif model_type == "MoogVCF_circuit_parameterized":
    from MoogVCF_rf3_parameterized import MoogVCFRFModel
    model = MoogVCFRFModel(0.7,5e-5,4.7e-8,2.5e-2, 1.0, 48e3).to(device)
    print("Moog VCF circuit data parameterized model initialized successfully")
elif model_type == "MoogVCF_nonlinear":
    from MoogVCF_nonlinear import MoogVCFRFModel
    model = MoogVCFRFModel(0.7, 1000.0, 0.1, 48e3).to(device)
    print("Moog VCF nonlinear model initialized successfully")
elif model_type == "MoogVCF_nonlinear_parameterized":
    from MoogVCF_nonlinear_parameterized import MoogVCFRFModel
    model = MoogVCFRFModel(0.7, 1000.0, 0.1, 48e3).to(device)
    print("Moog VCF nonlinear parameterized model initialized successfully")
else:
    print("Please pick valid model type")

## Initialize Dataloader


In [None]:
from iir_dataset import DIIRDataSet
from torch.utils.data import DataLoader

batch_size = 1
sequence_length = training_input.size
print(int(training_input.size/3))
loader = DataLoader(dataset=DIIRDataSet(training_input, target_output, sequence_length), batch_size=batch_size, shuffle = True)

In [None]:
from iir_dataset import DIIRDataSet
from torch.utils.data import DataLoader

batch_size = 1
sequence_length = int(training_input.size/4)
print(sequence_length)
loader = DataLoader(dataset=DIIRDataSet(training_input, target_output, sequence_length), batch_size=batch_size, shuffle = True)

## Define optimizer and criterion

In [None]:
import torch.nn as nn
from torch.optim import Adam
from Loss import ESRLoss, LossWrapper, DCLoss

n_epochs = 100
lr = 1e-2

optimizer = Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

criterion = LossWrapper({'ESR': 0.25, 'DC': 0.75},None)
# criterion = ESRLoss()
# criterion = DCLoss()

In [None]:
for param in model.parameters():
    print(param.data, param.size())

In [None]:
for name, param in model.named_parameters():
    print(name, param)

## Define train loop

In [None]:
def train(criterion, model, loader, optimizer, epoch):
    model.train()
    device = next(model.parameters()).device
    total_loss = 0
    count = 0
    for batch in loader:
        input_seq_batch = batch['input'].to(device)
        target_seq_batch = batch['target'].to(device)
        optimizer.zero_grad()
        predicted_output = model(input_seq_batch)
        loss = criterion(target_seq_batch, predicted_output)
        loss.requires_grad_(True)
        # torch.autograd.set_detect_anomaly(True)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        count = count + 1
        print("Epoch " + str(epoch) +" : batch " + str(count) + ", loss = " + str(loss.item()))

    total_loss /= len(loader)
    # print(count)
    return total_loss, count

## Train!

In [None]:
def plot_func_def(all_grads, layers):


    """
    Plot function definition to plot the average gradient with respect to the number of layers in the given model
    :param all_grads: Gradients wrt weights for each layer in the model.
    :param layers: Layer names corresponding to the model parameters
    :return: plot for gradient flow
    """
    plt.plot(all_grads, alpha=0.3, color="b")
    plt.hlines(0, 0, len(all_grads)+1, linewidth=1, color="k" )
    plt.xticks(range(0,len(all_grads), 1), layers, rotation="vertical")
    plt.xlim(xmin=0, xmax=len(all_grads))
    plt.xlabel("Layers")
    plt.ylabel("Average Gradient")
    plt.title("Gradient flow")
    plt.grid(True)
    plt.tight_layout()

    return plt


def plot_grad_flow(named_parameters):
    """
    The function is being called in Line 298 of this file.
    Receives the parameters of the model being trained. Returns plot of gradient flow for the given model parameters.

    """
    all_grads = []
    layers = []

    """
    Complete the code in the block below to collect absolute mean of the gradients for each layer in all_grads with the
    layer names in layers.
    """
    ########################################
    for name, param in named_parameters:
        if(param.requires_grad) and ("bias" not in name):
            nname = name.split(".")
            try:
                addname = nname[2]+"_"+nname[3]
                layers.append(addname)
            except:
                layers.append(nname[1])

            if param.device == torch.device("cuda:0"):
                all_grads.append(numpy.mean(numpy.fabs(param.grad.cpu().detach().numpy())))
            else:
                all_grads.append(numpy.mean(numpy.fabs(param.grad.detach().numpy())))


    ########################################

    # print(all_grads)
    # print(layers)
    plt = plot_func_def(all_grads, layers)

    return plt

In [None]:
# plt = plot_grad_flow(model.named_parameters())

In [None]:
import os
training_loss = np.zeros(n_epochs)
experiment_saved_models = ".\\model_MoogVCF\\"
print("training using device:"+ next(model.parameters()).device.type)
for epoch in range(n_epochs):
    loss, count = train(criterion, model, loader, optimizer, epoch)
    training_loss[epoch] = loss
    print("Epoch {} -- Loss {:3E}".format(epoch, loss))
    print("Generating Gradient Flow Plot at epoch {}".format(epoch))
    # plt = plot_grad_flow(model.named_parameters())
    # plt.savefig(os.path.join(experiment_saved_models, "epoch{}.pdf".format(str(epoch))))

In [None]:
list(model.parameters())

In [None]:
list(model.parameters())
print(training_loss)
epoch_axis = np.arange(0,n_epochs)
fig, ax = plt.subplots()
ax.plot(epoch_axis, training_loss)
ax.set(xlabel='Epoch', ylabel='Loss')
plt.show()

In [None]:
def evaluation(criterion, model, loader):
    device = next(model.parameters()).device
    total_loss = 0
    count = 0
    for batch in loader:
        input_seq_batch = batch['input'].to(device)
        target_seq_batch = batch['target'].to(device)
        predicted_output = model(input_seq_batch)
        if count == 0:
          output = ((predicted_output[:,:,0].detach().numpy()).reshape(-1, 1)).flatten()
          count = count + 1
          continue
        else:
          output = np.concatenate((output, ((predicted_output[:,:,0].detach().numpy()).reshape(-1, 1)).flatten()),axis=0)
        loss = criterion(target_seq_batch, predicted_output)
        total_loss += loss.item()

    re_input = batch['input'].to(device)

    total_loss /= len(loader)
    return total_loss, output, re_input
    # return output, re_input


In [None]:
from iir_dataset import DIIRDataSet
from torch.utils.data import DataLoader

testing_input, sr = librosa.load('./sound_examples/test_piano/test_input_piano.wav', sr = 48e3)
testing_output, sr = librosa.load('./sound_examples/test_piano/test_input_piano_o.wav', sr = 48e3)
testing_input = testing_input[0:testing_output.size]
print(testing_input.size)
print(testing_output)

batch_size = 1
sequence_length = int(testing_input.size/100)
test_loader = DataLoader(dataset=DIIRDataSet(testing_input, testing_output, sequence_length), batch_size=batch_size, shuffle = False)

In [None]:
from Loss import ESRLoss, DCLoss,LossWrapper
# criterion = LossWrapper({'ESR': 0.25, 'DC': 0.75},None)
criterion = ESRLoss()
testing_loss, predicted_output, test_input = evaluation(criterion, model, test_loader)

## Convert predicted result to audio

In [None]:
print(testing_loss)
# if predicted_output.device == torch.device("cuda:0"):
#     data = ((predicted_output[:,:,0].cpu().detach().numpy()).reshape(-1, 1)).flatten()
# else:
#     data = ((predicted_output[:,:,0].detach().numpy()).reshape(-1, 1)).flatten()
final_output = predicted_output
Audio(data = final_output, rate = sr)
# from scipy.io.wavfile import write
# fs = int(48e3)
# amplitude = 2.1474837779e9
# data = final_output * amplitude
# write("./sound_examples/finished_piano_output.wav", fs, data.astype(np.int32))
# final_piano, sr = librosa.load("./sound_examples/finished_piano_output.wav", sr = 48e3)
# print(final_output[2000])
# print(final_piano[2000])

from scipy.io.wavfile import write
fs = int(48e3)
amplitude = 2e11
data = testing_input * amplitude
write("./sound_examples/test_piano/finished_testing_input.wav", fs,data.astype(np.int32))#.astype(np.float), data
data = testing_output * amplitude
write("./sound_examples/test_piano/finished_testing_output.wav", fs,data.astype(np.int32))#.astype(np.float), data
data = final_output * amplitude
write("./sound_examples/test_piano/finished_final_output.wav", fs,data.astype(np.int32))#.astype(np.float), data

testing_input, sr = librosa.load('./sound_examples/test_piano/finished_testing_input.wav', sr = 48e3)
testing_output, sr = librosa.load('./sound_examples/test_piano/finished_testing_output.wav', sr = 48e3)
final_output, sr = librosa.load('./sound_examples/test_piano/finished_final_output.wav', sr = 48e3)


In [None]:
Fs = sr
t = np.arange(0,1,1/Fs)
n = np.size(t)
fr = 1e4*round((Fs/n))*np.linspace(0,1,round(n/2))

# input
X = fft(testing_input)
X_m = (2/n)*abs(X[0:np.size(fr)])
X_db = librosa.amplitude_to_db(X_m, ref = np.max)

# target
Y = fft(testing_output)
Y_m = (2/n)*abs(Y[0:np.size(fr)])
Y_db = librosa.amplitude_to_db(Y_m, ref = np.max)

# target
Z = fft(final_output)
Z_m = (2/n)*abs(Z[0:np.size(fr)])
Z_db = librosa.amplitude_to_db(Z_m, ref = np.max)

# plt.subplot(3,1,1)
plt.loglog(fr,abs(X_m));
plt.title('test input')
plt.xlabel('Frequency(Hz)')
plt.ylabel('Magnitude')
plt.tight_layout()

# plt.subplot(3,1,2)
plt.loglog(fr,abs(Y_m));
plt.title('test output')
plt.xlabel('Frequency(Hz)')
plt.ylabel('Magnitude')
plt.tight_layout()

# plt.subplot(3,1,3)
plt.loglog(fr,abs(Z_m),linestyle='--');
plt.title('predicted output')
plt.xlabel('Frequency(Hz)')
plt.ylabel('Magnitude')
plt.tight_layout()

# cutoff
# plt.xlim(1.686e3,1.6868e3)
# plt.xlim(1.5962e3,1.59725e3)
# plt.ylim(1e-5,1e-2)
# common
# plt.xlim(2.8e3,2.82e3)
# plt.ylim(1e-4,1e-2)
plt.legend(['input','target','predicted'])

In [None]:
# plot data
data = testing_input
time = np.arange(0,len(data)) / sr
# plt.subplot(3,1,1)
plt.plot(time, data)
plt.xlabel("Time(s)")
plt.ylabel("amplitude")
plt.title("test input signal")

data = testing_output
time = np.arange(0,len(data)) / sr
# plt.subplot(3,1,2)
plt.plot(time, data)
plt.xlabel("Time(s)")
plt.ylabel("amplitude")
plt.title("target output signal")

data = final_output
time = np.arange(0,len(data)) / sr
# plt.subplot(3,1,3)
plt.plot(time, data,linestyle='--')
plt.xlabel("Time(s)")
plt.ylabel("amplitude")
plt.title("predicted output signal")
plt.tight_layout()

# common
# plt.xlim(29.027,29.032)
# cutoff
# plt.xlim(3.110, 3.115)

# cutoff
plt.xlim(7.12,7.13)
plt.ylim(-0.12, 0.12)

# common
# plt.xlim(10.06,10.07)

plt.legend(['input','target','predicted'])

In [None]:
data, sr = librosa.load('./sound_examples/finished_piano_output.wav',sr=48e3)
data = final_output
# print data info
print(data.shape)
print(sr)
# plot data
time = np.arange(0,len(data)) / sr
fig, ax = plt.subplots()
ax.plot(time, data)
ax.set(xlabel='Time(s)', ylabel='amplitude')
plt.show()

# plot mel spectrogram
spec = librosa.feature.melspectrogram(y = data, sr = sr)
db_spec = librosa.power_to_db(spec, ref = np.max,)
log_spec = librosa.amplitude_to_db(spec)
librosa.display.specshow(spec, y_axis='mel', x_axis='s', sr = sr)
plt.colorbar()

# plot spectrogram
# D = np.abs(librosa.stft(data))**2
# S = librosa.feature.melspectrogram(S=D)
S = librosa.feature.melspectrogram(y=data, sr=48e3, n_mels=128, fmax = 8000)#8000
plt.figure(figsize=(10,4))
librosa.display.specshow(librosa.power_to_db(S,ref=np.max),y_axis = 'mel', fmax=8000, x_axis='time')
plt.colorbar(format = '%+2.0f dB')
plt.title('Mel spectrogram')
plt.tight_layout()
plt.show()

# audio file
print('predicted_output')
Audio(data = 0.1*data, rate = sr)

In [None]:
Audio(data = testing_input, rate = sr)


In [None]:
Audio(data = testing_output, rate = sr)

In [None]:
print(testing_input)
print(testing_output)
print(final_output)

In [None]:
Audio(data = testing_output, rate = sr)

In [None]:
Audio(data = final_output, rate = sr)