In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%cd drive/MyDrive/MVA_Audio/

/content/drive/MyDrive/MVA_Audio


In [4]:
%ls

 [0m[01;34mCode[0m/  [01;34m'Data Set'[0m/   [01;34mPaper[0m/   [01;34mResults[0m/


In [5]:
!pip install pyworld
!pip install pysptk

import matplotlib.pyplot as plt
import numpy as np
import os
import pyworld
import pysptk
import librosa
import time
import soundfile as sf

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from torch.autograd import Variable
from tqdm import tqdm
from tqdm import trange
from Code.preprocess import *
from Code.model import *

torch.manual_seed(0)
np.random.seed(0)

%matplotlib inline

Collecting pyworld
[?25l  Downloading https://files.pythonhosted.org/packages/af/88/003eef396c966cf00088900167831946b80b8e7650843905cb9590c2d9ca/pyworld-0.2.12.tar.gz (222kB)
[K     |█▌                              | 10kB 18.7MB/s eta 0:00:01[K     |███                             | 20kB 23.5MB/s eta 0:00:01[K     |████▍                           | 30kB 20.5MB/s eta 0:00:01[K     |█████▉                          | 40kB 23.8MB/s eta 0:00:01[K     |███████▍                        | 51kB 23.0MB/s eta 0:00:01[K     |████████▉                       | 61kB 17.3MB/s eta 0:00:01[K     |██████████▎                     | 71kB 18.6MB/s eta 0:00:01[K     |███████████▊                    | 81kB 18.8MB/s eta 0:00:01[K     |█████████████▎                  | 92kB 17.9MB/s eta 0:00:01[K     |██████████████▊                 | 102kB 18.2MB/s eta 0:00:01[K     |████████████████▏               | 112kB 18.2MB/s eta 0:00:01[K     |█████████████████▋              | 122kB 18.2MB/s eta 

In [6]:
model_name = "best_model"
model_dir  = "Results/" + model_name

data_dir   = "Data Set/vcc2018_training/"
test_dir   = "Data Set/vcc2018_evaluation/"
data_list  = ["VCC2SF1", "VCC2SF2", "VCC2SM1", "VCC2SM2"]

test_dir_s = "Results/Sounds/"
test_dir_f = "Results/Figures/"
test_dir_t = "Results/Tests/"

lambda_p   = 70
lambda_s   = 70
nb_label   = len(data_list)

In [7]:
num_epochs = 4000
batch_size = 1
learning_rate =1e-3
learning_rate_ = 1e-4
learning_rate__ = 1e-5
learning_rate___ = 1e-6
sampling_rate = 16000
num_envelope  = 36
num_mcep = 36
frame_period = 5.0
n_frames = 1024

In [8]:
for v in data_list:
    if "log_f0_"+v+".npz" in  os.listdir(os.path.join(data_dir, v)):
        continue
    print("Preprocess: " + v)
    preprocess_voice(os.path.join(data_dir, v), v)

In [9]:
def model_save(model, model_dir, model_name):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(model.state_dict(), os.path.join(model_dir, model_name))
    
def model_load(model_dir, model_name):
    model = ACVAE(nb_label=nb_label,lambda_p=lambda_p,lambda_s=lambda_s)
    model.load_state_dict(torch.load(os.path.join(model_dir, model_name)))

    return model

In [10]:
def save_figure(losses, epoch):        
    if not os.path.exists(test_dir_f):
            os.makedirs(test_dir_f)
    losses = np.array(losses)
    losses = losses.reshape(-1, 4)
    x = np.linspace(0, len(losses), len(losses))
    losses_label = ("L1", "KLD", "AC_p", "AC_s")
    plt.figure()
    plt.plot(x, losses[:,0], label=losses_label[0])
    plt.plot(x, losses[:,1], label=losses_label[1])
    plt.plot(x, losses[:,2], label=losses_label[2])
    plt.plot(x, losses[:,3], label=losses_label[3])
    plt.legend(bbox_to_anchor=(1, 1), loc='upper right', borderaxespad=0)
    plt.savefig(test_dir_f + "/" + "epoch_{:05}".format(epoch) + ".png")
    plt.savefig(test_dir_f + "/" + "result.png")
    plt.close()
        
    plt.figure()
    plt.plot(x, losses[:,2], label=losses_label[2])
    plt.plot(x, losses[:,3], label=losses_label[3])
    plt.legend(bbox_to_anchor=(1, 1), loc='upper right', borderaxespad=0)
    plt.savefig(test_dir_f + "/" + "epoch_{:05}_AC".format(epoch) + ".png")
    plt.savefig(test_dir_f + "/" + "result_AC.png")
    plt.close()
    
    plt.figure()
    plt.plot(x, losses[:,0], label=losses_label[0])
    plt.legend(bbox_to_anchor=(1, 1), loc='upper right', borderaxespad=0)
    plt.savefig(test_dir_f + "/" + "epoch_{:05}_L1".format(epoch) + ".png")
    plt.savefig(test_dir_f + "/" + "result_L1.png")
    plt.close()
    
    plt.figure()
    plt.plot(x, losses[:,1], label=losses_label[1])
    plt.legend(bbox_to_anchor=(1, 1), loc='upper right', borderaxespad=0)
    plt.savefig(test_dir_f + "/" + "epoch_{:05}_KLD".format(epoch) + ".png")
    plt.savefig(test_dir_f + "/" + "result_KLD.png")
    plt.close()

In [11]:
def data_load(batchsize = 1, s = -1, t = -1):
    x = []
    label = []
    for i in range(batchsize):
        if (s == -1):
            label_num = np.random.randint(nb_label)
        else:
            label_num = s
        voice_path = os.path.join(data_dir, data_list[label_num])
        files = os.listdir(voice_path)
        
        frames = 0
        #while frames < n_frames:
        file = ""
        while file.count("wav") == 0:
            file = np.random.choice(files)
        wav, _ = librosa.load(os.path.join(voice_path, file), sr = sampling_rate, mono = True)
        wav = librosa.util.normalize(wav, norm=np.inf, axis=None)
        wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
        f0, timeaxis, sp, ap, mc = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period, num_mcep = num_mcep)
        
        mc_transposed  = np.array(mc).T
        frames = np.shape(mc_transposed)[1]
            
        mcep_normalization_params = np.load(os.path.join(voice_path, "mcep_"+data_list[label_num]+".npz"))
        mcep_mean = mcep_normalization_params['mean']
        mcep_std = mcep_normalization_params['std']
        mc_norm = (mc_transposed  - mcep_mean) / mcep_std
            
        #start_ = np.random.randint(frames - n_frames + 1)
        #end_ = start_ + n_frames
            
        #x.append(mc_norm[:,start_:end_])
        n_frames = mc_norm.shape[1]
        x.append(mc_norm[:,:])
        label.append(label_num)

    return torch.Tensor(x).view(batchsize, 1, num_mcep, n_frames), torch.Tensor(label)

In [12]:
def test_conv(model, epoch):
    print("Test")
    
    if not os.path.exists(test_dir_s):
        os.makedirs(test_dir_s)
    
    output_epoch_dir = os.path.join(test_dir_s, "epoch_{:05}".format(epoch))
    if not os.path.exists(output_epoch_dir):
        os.makedirs(output_epoch_dir)
    
    for s_label in range(nb_label):
    
        voice_path_s = os.path.join(data_dir, data_list[s_label])

        files = os.listdir(voice_path_s)
        file = ""
        while file.count("wav") == 0:
            file = np.random.choice(files)
        
        print("Source File:" + file)

        for t_label in range(nb_label):

            if (t_label == s_label):
                continue

            voice_path_t = os.path.join(data_dir, data_list[t_label])

            wav, _ = librosa.load(os.path.join(voice_path_s, file), sr = sampling_rate, mono = True)
            wav = librosa.util.normalize(wav, norm=np.inf, axis=None)
            wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
            f0, timeaxis, sp, ap, mc = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)

            mc_transposed  = np.array(mc).T

            mcep_normalization_params_s = np.load(os.path.join(voice_path_s, "mcep_"+data_list[s_label]+".npz"))
            mcep_mean_s = mcep_normalization_params_s['mean']
            mcep_std_s = mcep_normalization_params_s['std']    
            mcep_normalization_params_t = np.load(os.path.join(voice_path_t, "mcep_"+data_list[t_label]+".npz"))
            mcep_mean_t = mcep_normalization_params_t['mean']
            mcep_std_t = mcep_normalization_params_t['std']

            mc_norm = (mc_transposed - mcep_mean_s) / mcep_std_s

            x = torch.Tensor(mc_norm).view(1, 1, mc_norm.shape[0], mc_norm.shape[1])

            label_s_tensor = torch.Tensor(np.array([s_label])).view(1, 1)
            label_t_tensor = torch.Tensor(np.array([t_label])).view(1, 1)

            x = x.to(device)
            label_s_tensor = label_s_tensor.to(device)
            label_t_tensor = label_t_tensor.to(device)

            mu_enc, logvar_enc = model.encode(x, label_s_tensor)
            z_enc = model.reparameterize(mu_enc, logvar_enc)
            # x^
            mu_dec_t, logvar_dec_t = model.decode(z_enc, label_t_tensor)
            z_dec_t = model.reparameterize(mu_dec_t, logvar_dec_t)
            if (torch.cuda.is_available()):
                z_dec_t = z_dec_t.data.cpu().numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))
            else:
                z_dec_t = z_dec_t.data.numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))
            # x_
            mu_dec_s, logvar_dec_s = model.decode(z_enc, label_s_tensor)
            z_dec_s = model.reparameterize(mu_dec_s, logvar_dec_s)
            if (torch.cuda.is_available()):
                z_dec_s = z_dec_s.data.cpu().numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))
            else:
                z_dec_s = z_dec_s.data.numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))

            mc_converted_t = z_dec_t * mcep_std_t + mcep_mean_t
            mc_converted_t = mc_converted_t.T
            mc_converted_t = np.ascontiguousarray(mc_converted_t)
            sp_converted_t = world_decode_mc(mc = mc_converted_t, fs = sampling_rate)
            mc_converted_s = z_dec_s * mcep_std_s + mcep_mean_s
            mc_converted_s = mc_converted_s.T
            mc_converted_s = np.ascontiguousarray(mc_converted_s)
            sp_converted_s = world_decode_mc(mc = mc_converted_s, fs = sampling_rate)

            sp_gained = np.multiply(sp, np.divide(sp_converted_t, sp_converted_s))

            logf0s_normalization_params_s = np.load(os.path.join(voice_path_s, "log_f0_"+data_list[s_label]+".npz"))
            logf0s_mean_s = logf0s_normalization_params_s['mean']
            logf0s_std_s = logf0s_normalization_params_s['std']
            logf0s_normalization_params_t = np.load(os.path.join(voice_path_t, "log_f0_"+data_list[t_label]+".npz"))
            logf0s_mean_t = logf0s_normalization_params_t['mean']
            logf0s_std_t = logf0s_normalization_params_t['std']

            f0_converted = pitch_conversion(f0 = f0, mean_log_src = logf0s_mean_s, std_log_src = logf0s_std_s, mean_log_target = logf0s_mean_t, std_log_target = logf0s_std_t)
            
            wav_transformed = world_speech_synthesis(f0 = f0_converted, sp = sp_gained, ap = ap, fs = sampling_rate, frame_period = frame_period)
            sf.write(os.path.join(output_epoch_dir, data_list[s_label]+"_to_"+data_list[t_label]+"_["+file+"].wav"), wav_transformed, sampling_rate)
            wav_source = world_speech_synthesis(f0 = f0_converted, sp = sp, ap = ap, fs = sampling_rate, frame_period = frame_period)
            sf.write(os.path.join(output_epoch_dir, data_list[s_label]+"_to_"+data_list[t_label]+"_["+file+"]_nonconv.wav"), wav_source, sampling_rate)

            print("Converted: " + data_list[s_label] + " -> " + data_list[t_label])

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = ACVAE(nb_label=nb_label,lambda_p=lambda_p,lambda_s=lambda_s).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.train()

losses = []

start_time = time.time()

for epoch in range(num_epochs):
    epoch += 1
    
    if (epoch == 1000):
        learning_rate = learning_rate_ 
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
    if (epoch == 2000):
        learning_rate = learning_rate__
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
    if (epoch == 3000):
        learning_rate = learning_rate___
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

    print('Epoch: %d' % epoch)

    x_, label_ = data_load(batchsize = 1)

    optimizer.zero_grad()
    loss, loss_list = model.calc_loss(x_, label_)
    loss.backward()
    losses.append(loss_list)
    optimizer.step()

    print("- Loss: {}".format(loss))
    
    if epoch % 100 == 0:
        test_conv(model, epoch)
    if epoch % 100 == 0:
        model_save(model, model_dir, model_name)
    if epoch % 2000 == 0:
        model_save(model, model_dir, model_name + "_" + str(epoch))

    if epoch % 100 == 0:
        save_figure(losses, epoch)
  
elapsed_time = time.time() - start_time
print('\nTime Elapsed: %02d:%02d:%02d' % (elapsed_time // 3600, (elapsed_time % 3600 // 60), (elapsed_time % 60 // 1)))

model_save(model, model_dir, model_name)

save_figure(losses, epoch)

cuda
Epoch: 1
- Loss: 36635.0078125
Epoch: 2
- Loss: 21007.921875
Epoch: 3
- Loss: 32933.19140625
Epoch: 4
- Loss: 19241.826171875
Epoch: 5
- Loss: 29884.587890625
Epoch: 6
- Loss: 52933.44921875
Epoch: 7
- Loss: 45264.7578125
Epoch: 8
- Loss: 19526.759765625
Epoch: 9
- Loss: 26307.1484375
Epoch: 10
- Loss: 12585.642578125
Epoch: 11
- Loss: 45252.3515625
Epoch: 12
- Loss: 30860.72265625
Epoch: 13
- Loss: 31509.47265625
Epoch: 14
- Loss: 19061.478515625
Epoch: 15
- Loss: 38722.06640625
Epoch: 16
- Loss: 36470.5078125
Epoch: 17
- Loss: 13985.1396484375
Epoch: 18
- Loss: 12945.1767578125
Epoch: 19
- Loss: 25537.734375
Epoch: 20
- Loss: 19502.064453125
Epoch: 21
- Loss: 21128.07421875
Epoch: 22
- Loss: 36796.14453125
Epoch: 23
- Loss: 50854.19140625
Epoch: 24
- Loss: 59265.06640625
Epoch: 25
- Loss: 21683.16015625
Epoch: 26
- Loss: 15685.5048828125
Epoch: 27
- Loss: 25218.439453125
Epoch: 28
- Loss: 30935.0546875
Epoch: 29
- Loss: 20758.078125
Epoch: 30
- Loss: 34072.453125
Epoch: 31
- Los

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10003.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10020.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10076.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 101
- Loss: 21292.626953125
Epoch: 102
- Loss: 13904.8876953125
Epoch: 103
- Loss: 18896.048828125
Epoch: 104
- Loss: 27072.208984375
Epoch: 105
- Loss: 13123.9384765625
Epoch: 106
- Loss: 26667.515625
Epoch: 107
- Loss: 24965.1328125
Epoch: 108
- Loss: 8433.326171875
Epoch: 109
- Loss: 13964.1748046875
Epoch: 110
- Loss: 18527.9140625
Epoch: 111
- Loss: 16153.064453125
Epoch: 112
- Loss: 15421.37109375
Epoch: 113
- Loss: 17819.673828125
Epoch: 114
- Loss: 14039.990234375
Epoch: 115
- Loss: 20360.75390625
Epoch: 116
- Loss: 14338.0625
Epoch: 117
- Loss: 23404.9

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10017.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10037.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10055.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 201
- Loss: 13259.087890625
Epoch: 202
- Loss: 33804.3359375
Epoch: 203
- Loss: 9048.880859375
Epoch: 204
- Loss: 12447.9599609375
Epoch: 205
- Loss: 16049.1064453125
Epoch: 206
- Loss: 10168.642578125
Epoch: 207
- Loss: 15635.212890625
Epoch: 208
- Loss: 15877.8603515625
Epoch: 209
- Loss: 22840.248046875
Epoch: 210
- Loss: 35096.5625
Epoch: 211
- Loss: 24980.734375
Epoch: 212
- Loss: 16212.1630859375
Epoch: 213
- Loss: 20088.646484375
Epoch: 214
- Loss: 12799.5185546875
Epoch: 215
- Loss: 13959.771484375
Epoch: 216
- Loss: 37101.8203125
Epoch: 217
- Loss: 976

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10053.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10045.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10021.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 301
- Loss: 19154.75
Epoch: 302
- Loss: 9795.8974609375
Epoch: 303
- Loss: 38352.0390625
Epoch: 304
- Loss: 27518.03515625
Epoch: 305
- Loss: 18259.298828125
Epoch: 306
- Loss: 22390.05859375
Epoch: 307
- Loss: 7430.6279296875
Epoch: 308
- Loss: 13395.91015625
Epoch: 309
- Loss: 15324.5859375
Epoch: 310
- Loss: 21430.138671875
Epoch: 311
- Loss: 25591.60546875
Epoch: 312
- Loss: 15897.5068359375
Epoch: 313
- Loss: 15729.5185546875
Epoch: 314
- Loss: 11234.255859375
Epoch: 315
- Loss: 32517.5
Epoch: 316
- Loss: 6953.193359375
Epoch: 317
- Loss: 9685.99609375
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10066.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10045.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10059.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 401
- Loss: 13940.27734375
Epoch: 402
- Loss: 12375.4404296875
Epoch: 403
- Loss: 21037.681640625
Epoch: 404
- Loss: 20481.458984375
Epoch: 405
- Loss: 15719.10546875
Epoch: 406
- Loss: 13384.9404296875
Epoch: 407
- Loss: 13257.666015625
Epoch: 408
- Loss: 19965.685546875
Epoch: 409
- Loss: 14729.720703125
Epoch: 410
- Loss: 17856.419921875
Epoch: 411
- Loss: 32331.103515625
Epoch: 412
- Loss: 24577.21484375
Epoch: 413
- Loss: 12710.46875
Epoch: 414
- Loss: 22543.591796875
Epoch: 415
- Loss: 13318.2216796875
Epoch: 416
- Loss: 29098.435546875
Epoch: 417
- Loss:

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10001.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10012.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10080.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 501
- Loss: 12094.732421875
Epoch: 502
- Loss: 5350.3486328125
Epoch: 503
- Loss: 40153.6484375
Epoch: 504
- Loss: 14084.87890625
Epoch: 505
- Loss: 20237.759765625
Epoch: 506
- Loss: 13078.220703125
Epoch: 507
- Loss: 21072.5078125
Epoch: 508
- Loss: 7362.38818359375
Epoch: 509
- Loss: 13679.8232421875
Epoch: 510
- Loss: 27220.49609375
Epoch: 511
- Loss: 12116.6083984375
Epoch: 512
- Loss: 19564.1640625
Epoch: 513
- Loss: 17755.048828125
Epoch: 514
- Loss: 20872.681640625
Epoch: 515
- Loss: 12328.8076171875
Epoch: 516
- Loss: 20932.966796875
Epoch: 517
- Loss:

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10034.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10037.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10001.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 601
- Loss: 15276.36328125
Epoch: 602
- Loss: 14936.0712890625
Epoch: 603
- Loss: 16802.1171875
Epoch: 604
- Loss: 10508.9951171875
Epoch: 605
- Loss: 16864.814453125
Epoch: 606
- Loss: 20181.650390625
Epoch: 607
- Loss: 10500.3740234375
Epoch: 608
- Loss: 8057.935546875
Epoch: 609
- Loss: 15068.94921875
Epoch: 610
- Loss: 13343.2900390625
Epoch: 611
- Loss: 41605.45703125
Epoch: 612
- Loss: 11715.1796875
Epoch: 613
- Loss: 19870.6328125
Epoch: 614
- Loss: 13228.2880859375
Epoch: 615
- Loss: 25006.44140625
Epoch: 616
- Loss: 12530.962890625
Epoch: 617
- Loss: 1

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10020.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10051.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10004.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 701
- Loss: 9338.4248046875
Epoch: 702
- Loss: 39519.7265625
Epoch: 703
- Loss: 11469.0400390625
Epoch: 704
- Loss: 19663.78125
Epoch: 705
- Loss: 12242.0986328125
Epoch: 706
- Loss: 18960.041015625
Epoch: 707
- Loss: 20871.25
Epoch: 708
- Loss: 31085.73046875
Epoch: 709
- Loss: 8082.57275390625
Epoch: 710
- Loss: 21153.203125
Epoch: 711
- Loss: 18119.080078125
Epoch: 712
- Loss: 23071.400390625
Epoch: 713
- Loss: 26687.04296875
Epoch: 714
- Loss: 20427.048828125
Epoch: 715
- Loss: 11717.240234375
Epoch: 716
- Loss: 18062.7578125
Epoch: 717
- Loss: 21590.720703

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10070.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10055.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10029.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 801
- Loss: 12451.8232421875
Epoch: 802
- Loss: 27618.763671875
Epoch: 803
- Loss: 8670.951171875
Epoch: 804
- Loss: 6989.7021484375
Epoch: 805
- Loss: 14152.9111328125
Epoch: 806
- Loss: 11229.9736328125
Epoch: 807
- Loss: 13607.9375
Epoch: 808
- Loss: 19749.765625
Epoch: 809
- Loss: 11857.455078125
Epoch: 810
- Loss: 11310.8984375
Epoch: 811
- Loss: 20130.185546875
Epoch: 812
- Loss: 26907.583984375
Epoch: 813
- Loss: 29486.4375
Epoch: 814
- Loss: 8914.080078125
Epoch: 815
- Loss: 23086.896484375
Epoch: 816
- Loss: 19054.91015625
Epoch: 817
- Loss: 12514.4814

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10063.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10026.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10035.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 901
- Loss: 9106.515625
Epoch: 902
- Loss: 19065.919921875
Epoch: 903
- Loss: 10547.369140625
Epoch: 904
- Loss: 13580.18359375
Epoch: 905
- Loss: 14115.9912109375
Epoch: 906
- Loss: 20676.197265625
Epoch: 907
- Loss: 31127.173828125
Epoch: 908
- Loss: 16964.736328125
Epoch: 909
- Loss: 20017.30859375
Epoch: 910
- Loss: 18938.072265625
Epoch: 911
- Loss: 22343.734375
Epoch: 912
- Loss: 15018.2841796875
Epoch: 913
- Loss: 11756.8173828125
Epoch: 914
- Loss: 19278.853515625
Epoch: 915
- Loss: 7818.1513671875
Epoch: 916
- Loss: 10191.1240234375
Epoch: 917
- Loss: 

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10013.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10007.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10008.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1001
- Loss: 14700.046875
Epoch: 1002
- Loss: 14293.9345703125
Epoch: 1003
- Loss: 30955.107421875
Epoch: 1004
- Loss: 19599.03515625
Epoch: 1005
- Loss: 11484.3828125
Epoch: 1006
- Loss: 17029.630859375
Epoch: 1007
- Loss: 35053.92578125
Epoch: 1008
- Loss: 19309.056640625
Epoch: 1009
- Loss: 22160.259765625
Epoch: 1010
- Loss: 11926.4814453125
Epoch: 1011
- Loss: 17016.14453125
Epoch: 1012
- Loss: 34811.17578125
Epoch: 1013
- Loss: 19662.353515625
Epoch: 1014
- Loss: 14380.57421875
Epoch: 1015
- Loss: 35302.86328125
Epoch: 1016
- Loss: 28684.578125
Epoch: 101

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10062.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10055.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10072.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1101
- Loss: 12261.4931640625
Epoch: 1102
- Loss: 13134.96484375
Epoch: 1103
- Loss: 10634.86328125
Epoch: 1104
- Loss: 18980.2109375
Epoch: 1105
- Loss: 14613.923828125
Epoch: 1106
- Loss: 14334.548828125
Epoch: 1107
- Loss: 11108.966796875
Epoch: 1108
- Loss: 16085.2451171875
Epoch: 1109
- Loss: 17778.451171875
Epoch: 1110
- Loss: 20767.20703125
Epoch: 1111
- Loss: 11268.0576171875
Epoch: 1112
- Loss: 7105.08837890625
Epoch: 1113
- Loss: 12665.95703125
Epoch: 1114
- Loss: 8855.8359375
Epoch: 1115
- Loss: 21261.966796875
Epoch: 1116
- Loss: 11391.287109375
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10023.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10058.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10016.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1201
- Loss: 23788.869140625
Epoch: 1202
- Loss: 12583.3466796875
Epoch: 1203
- Loss: 17058.9375
Epoch: 1204
- Loss: 14673.513671875
Epoch: 1205
- Loss: 20680.767578125
Epoch: 1206
- Loss: 24123.34375
Epoch: 1207
- Loss: 10453.556640625
Epoch: 1208
- Loss: 17572.484375
Epoch: 1209
- Loss: 20490.396484375
Epoch: 1210
- Loss: 13614.310546875
Epoch: 1211
- Loss: 14210.384765625
Epoch: 1212
- Loss: 29833.4453125
Epoch: 1213
- Loss: 14557.859375
Epoch: 1214
- Loss: 32739.01171875
Epoch: 1215
- Loss: 25701.81640625
Epoch: 1216
- Loss: 33153.1328125
Epoch: 1217
- Loss

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10044.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10069.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10024.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1301
- Loss: 8699.373046875
Epoch: 1302
- Loss: 11921.28515625
Epoch: 1303
- Loss: 16301.765625
Epoch: 1304
- Loss: 20612.0234375
Epoch: 1305
- Loss: 17645.87109375
Epoch: 1306
- Loss: 19985.591796875
Epoch: 1307
- Loss: 8738.4423828125
Epoch: 1308
- Loss: 12215.6533203125
Epoch: 1309
- Loss: 11154.1552734375
Epoch: 1310
- Loss: 13963.6630859375
Epoch: 1311
- Loss: 28939.5
Epoch: 1312
- Loss: 13931.123046875
Epoch: 1313
- Loss: 11658.072265625
Epoch: 1314
- Loss: 12073.865234375
Epoch: 1315
- Loss: 18893.072265625
Epoch: 1316
- Loss: 39080.9765625
Epoch: 1317
-

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10043.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10046.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10012.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1401
- Loss: 34635.45703125
Epoch: 1402
- Loss: 21641.033203125
Epoch: 1403
- Loss: 21135.158203125
Epoch: 1404
- Loss: 11355.3017578125
Epoch: 1405
- Loss: 32218.205078125
Epoch: 1406
- Loss: 17323.29296875
Epoch: 1407
- Loss: 11959.65625
Epoch: 1408
- Loss: 9657.875
Epoch: 1409
- Loss: 42835.36328125
Epoch: 1410
- Loss: 21906.419921875
Epoch: 1411
- Loss: 18963.34765625
Epoch: 1412
- Loss: 11063.1279296875
Epoch: 1413
- Loss: 31359.9453125
Epoch: 1414
- Loss: 33406.015625
Epoch: 1415
- Loss: 17306.697265625
Epoch: 1416
- Loss: 11965.3056640625
Epoch: 1417
- L

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10060.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10036.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10045.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1501
- Loss: 16651.8203125
Epoch: 1502
- Loss: 20255.197265625
Epoch: 1503
- Loss: 20951.66796875
Epoch: 1504
- Loss: 7035.28271484375
Epoch: 1505
- Loss: 6650.03564453125
Epoch: 1506
- Loss: 12263.5693359375
Epoch: 1507
- Loss: 19888.365234375
Epoch: 1508
- Loss: 6141.744140625
Epoch: 1509
- Loss: 9865.5244140625
Epoch: 1510
- Loss: 6642.24755859375
Epoch: 1511
- Loss: 20341.51953125
Epoch: 1512
- Loss: 37975.55078125
Epoch: 1513
- Loss: 13250.763671875
Epoch: 1514
- Loss: 30538.158203125
Epoch: 1515
- Loss: 9994.9970703125
Epoch: 1516
- Loss: 12608.02734375
E

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10066.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10054.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10012.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1601
- Loss: 13296.986328125
Epoch: 1602
- Loss: 14640.4072265625
Epoch: 1603
- Loss: 22723.5234375
Epoch: 1604
- Loss: 10921.96875
Epoch: 1605
- Loss: 10498.5048828125
Epoch: 1606
- Loss: 12678.28515625
Epoch: 1607
- Loss: 14297.5146484375
Epoch: 1608
- Loss: 16606.564453125
Epoch: 1609
- Loss: 25609.875
Epoch: 1610
- Loss: 19129.35546875
Epoch: 1611
- Loss: 25532.56640625
Epoch: 1612
- Loss: 19770.13671875
Epoch: 1613
- Loss: 17148.208984375
Epoch: 1614
- Loss: 27192.330078125
Epoch: 1615
- Loss: 25787.8984375
Epoch: 1616
- Loss: 13311.517578125
Epoch: 1617
-

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10030.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10004.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10029.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1701
- Loss: 13812.453125
Epoch: 1702
- Loss: 17100.654296875
Epoch: 1703
- Loss: 8800.033203125
Epoch: 1704
- Loss: 12298.33984375
Epoch: 1705
- Loss: 10356.025390625
Epoch: 1706
- Loss: 16609.98828125
Epoch: 1707
- Loss: 25730.927734375
Epoch: 1708
- Loss: 11605.033203125
Epoch: 1709
- Loss: 27018.287109375
Epoch: 1710
- Loss: 30166.568359375
Epoch: 1711
- Loss: 33042.01171875
Epoch: 1712
- Loss: 34582.95703125
Epoch: 1713
- Loss: 16980.25
Epoch: 1714
- Loss: 30230.71875
Epoch: 1715
- Loss: 11119.2587890625
Epoch: 1716
- Loss: 21259.56640625
Epoch: 1717
- Los

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10037.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10003.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10075.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1801
- Loss: 15947.7314453125
Epoch: 1802
- Loss: 10556.1796875
Epoch: 1803
- Loss: 16126.390625
Epoch: 1804
- Loss: 26113.736328125
Epoch: 1805
- Loss: 28222.57421875
Epoch: 1806
- Loss: 18800.470703125
Epoch: 1807
- Loss: 18993.03515625
Epoch: 1808
- Loss: 14164.3427734375
Epoch: 1809
- Loss: 21856.275390625
Epoch: 1810
- Loss: 7506.36376953125
Epoch: 1811
- Loss: 9935.390625
Epoch: 1812
- Loss: 6399.97265625
Epoch: 1813
- Loss: 12641.6728515625
Epoch: 1814
- Loss: 12065.5234375
Epoch: 1815
- Loss: 28443.396484375
Epoch: 1816
- Loss: 9858.6396484375
Epoch: 18

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10036.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10001.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10043.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 1901
- Loss: 30332.3828125
Epoch: 1902
- Loss: 13328.1865234375
Epoch: 1903
- Loss: 7641.28125
Epoch: 1904
- Loss: 11876.244140625
Epoch: 1905
- Loss: 11214.5283203125
Epoch: 1906
- Loss: 22423.9921875
Epoch: 1907
- Loss: 19020.779296875
Epoch: 1908
- Loss: 8304.6591796875
Epoch: 1909
- Loss: 22896.06640625
Epoch: 1910
- Loss: 8697.4814453125
Epoch: 1911
- Loss: 6617.7548828125
Epoch: 1912
- Loss: 18708.94921875
Epoch: 1913
- Loss: 14202.3505859375
Epoch: 1914
- Loss: 14531.169921875
Epoch: 1915
- Loss: 20577.091796875
Epoch: 1916
- Loss: 18851.58203125
Epoch: 

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10042.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10004.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10009.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2001
- Loss: 20666.310546875
Epoch: 2002
- Loss: 17867.248046875
Epoch: 2003
- Loss: 21681.12109375
Epoch: 2004
- Loss: 31041.7109375
Epoch: 2005
- Loss: 7363.17431640625
Epoch: 2006
- Loss: 12931.6875
Epoch: 2007
- Loss: 20276.037109375
Epoch: 2008
- Loss: 26021.361328125
Epoch: 2009
- Loss: 37740.62890625
Epoch: 2010
- Loss: 18813.953125
Epoch: 2011
- Loss: 14435.1640625
Epoch: 2012
- Loss: 11003.763671875
Epoch: 2013
- Loss: 11957.96875
Epoch: 2014
- Loss: 17994.9765625
Epoch: 2015
- Loss: 20330.96875
Epoch: 2016
- Loss: 20522.75390625
Epoch: 2017
- Loss: 14

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10031.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10014.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10048.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2101
- Loss: 26046.62890625
Epoch: 2102
- Loss: 14966.37109375
Epoch: 2103
- Loss: 21845.451171875
Epoch: 2104
- Loss: 20530.419921875
Epoch: 2105
- Loss: 21134.9453125
Epoch: 2106
- Loss: 26168.716796875
Epoch: 2107
- Loss: 20243.06640625
Epoch: 2108
- Loss: 13893.2978515625
Epoch: 2109
- Loss: 19124.693359375
Epoch: 2110
- Loss: 19963.853515625
Epoch: 2111
- Loss: 18034.123046875
Epoch: 2112
- Loss: 20270.302734375
Epoch: 2113
- Loss: 11070.3857421875
Epoch: 2114
- Loss: 8945.1201171875
Epoch: 2115
- Loss: 10984.82421875
Epoch: 2116
- Loss: 36997.19921875
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10023.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10078.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10014.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2201
- Loss: 18606.4375
Epoch: 2202
- Loss: 21663.013671875
Epoch: 2203
- Loss: 17073.0546875
Epoch: 2204
- Loss: 13962.240234375
Epoch: 2205
- Loss: 11781.3896484375
Epoch: 2206
- Loss: 8268.6279296875
Epoch: 2207
- Loss: 11325.087890625
Epoch: 2208
- Loss: 19688.056640625
Epoch: 2209
- Loss: 11108.9814453125
Epoch: 2210
- Loss: 28399.396484375
Epoch: 2211
- Loss: 9909.3466796875
Epoch: 2212
- Loss: 12653.23828125
Epoch: 2213
- Loss: 11808.18359375
Epoch: 2214
- Loss: 18687.966796875
Epoch: 2215
- Loss: 21624.572265625
Epoch: 2216
- Loss: 20469.9765625
Epoch: 

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10073.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10043.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10010.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2301
- Loss: 21114.28515625
Epoch: 2302
- Loss: 21702.64453125
Epoch: 2303
- Loss: 21758.115234375
Epoch: 2304
- Loss: 11119.1337890625
Epoch: 2305
- Loss: 37728.33984375
Epoch: 2306
- Loss: 33052.60546875
Epoch: 2307
- Loss: 7008.22998046875
Epoch: 2308
- Loss: 17128.732421875
Epoch: 2309
- Loss: 42383.61328125
Epoch: 2310
- Loss: 14510.29296875
Epoch: 2311
- Loss: 12412.310546875
Epoch: 2312
- Loss: 17380.462890625
Epoch: 2313
- Loss: 24013.205078125
Epoch: 2314
- Loss: 20478.638671875
Epoch: 2315
- Loss: 11617.3896484375
Epoch: 2316
- Loss: 26147.962890625
E

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10060.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10062.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10042.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2401
- Loss: 34480.578125
Epoch: 2402
- Loss: 23476.22265625
Epoch: 2403
- Loss: 12860.22265625
Epoch: 2404
- Loss: 11311.96484375
Epoch: 2405
- Loss: 17005.525390625
Epoch: 2406
- Loss: 8333.658203125
Epoch: 2407
- Loss: 17056.080078125
Epoch: 2408
- Loss: 34894.14453125
Epoch: 2409
- Loss: 18317.1171875
Epoch: 2410
- Loss: 17383.080078125
Epoch: 2411
- Loss: 19380.001953125
Epoch: 2412
- Loss: 14112.3759765625
Epoch: 2413
- Loss: 11946.375
Epoch: 2414
- Loss: 14197.3447265625
Epoch: 2415
- Loss: 12379.7744140625
Epoch: 2416
- Loss: 5537.70654296875
Epoch: 241

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10009.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10058.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10064.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2501
- Loss: 26146.13671875
Epoch: 2502
- Loss: 18715.65234375
Epoch: 2503
- Loss: 6979.15087890625
Epoch: 2504
- Loss: 16833.447265625
Epoch: 2505
- Loss: 28502.849609375
Epoch: 2506
- Loss: 11637.6904296875
Epoch: 2507
- Loss: 23535.583984375
Epoch: 2508
- Loss: 9931.337890625
Epoch: 2509
- Loss: 19383.03125
Epoch: 2510
- Loss: 23728.455078125
Epoch: 2511
- Loss: 9953.7119140625
Epoch: 2512
- Loss: 14105.1162109375
Epoch: 2513
- Loss: 9995.3994140625
Epoch: 2514
- Loss: 7492.3642578125
Epoch: 2515
- Loss: 28432.1640625
Epoch: 2516
- Loss: 10242.6787109375
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10013.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10046.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10026.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2601
- Loss: 11949.4267578125
Epoch: 2602
- Loss: 21087.916015625
Epoch: 2603
- Loss: 12157.302734375
Epoch: 2604
- Loss: 27116.9609375
Epoch: 2605
- Loss: 42302.97265625
Epoch: 2606
- Loss: 19360.646484375
Epoch: 2607
- Loss: 19607.3046875
Epoch: 2608
- Loss: 13343.4736328125
Epoch: 2609
- Loss: 6461.97900390625
Epoch: 2610
- Loss: 13865.0625
Epoch: 2611
- Loss: 6385.6015625
Epoch: 2612
- Loss: 26030.595703125
Epoch: 2613
- Loss: 17261.462890625
Epoch: 2614
- Loss: 8762.5869140625
Epoch: 2615
- Loss: 11876.70703125
Epoch: 2616
- Loss: 7698.462890625
Epoch: 261

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10006.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10022.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10057.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2701
- Loss: 17121.779296875
Epoch: 2702
- Loss: 29690.56640625
Epoch: 2703
- Loss: 12580.3994140625
Epoch: 2704
- Loss: 17370.2578125
Epoch: 2705
- Loss: 26920.322265625
Epoch: 2706
- Loss: 10136.392578125
Epoch: 2707
- Loss: 20780.466796875
Epoch: 2708
- Loss: 17236.2265625
Epoch: 2709
- Loss: 9795.181640625
Epoch: 2710
- Loss: 13601.810546875
Epoch: 2711
- Loss: 6817.13671875
Epoch: 2712
- Loss: 13828.271484375
Epoch: 2713
- Loss: 14888.7802734375
Epoch: 2714
- Loss: 8883.2783203125
Epoch: 2715
- Loss: 9790.6220703125
Epoch: 2716
- Loss: 20298.44140625
Epoch

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10033.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10028.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10021.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2801
- Loss: 14522.123046875
Epoch: 2802
- Loss: 23382.099609375
Epoch: 2803
- Loss: 8314.4814453125
Epoch: 2804
- Loss: 26066.84375
Epoch: 2805
- Loss: 14602.931640625
Epoch: 2806
- Loss: 15025.56640625
Epoch: 2807
- Loss: 20009.15234375
Epoch: 2808
- Loss: 7628.17626953125
Epoch: 2809
- Loss: 12203.41796875
Epoch: 2810
- Loss: 11032.4775390625
Epoch: 2811
- Loss: 13807.9970703125
Epoch: 2812
- Loss: 13187.6923828125
Epoch: 2813
- Loss: 21467.62890625
Epoch: 2814
- Loss: 7635.00146484375
Epoch: 2815
- Loss: 17942.087890625
Epoch: 2816
- Loss: 10288.404296875
E

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10068.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10062.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10023.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 2901
- Loss: 9735.9990234375
Epoch: 2902
- Loss: 8254.2216796875
Epoch: 2903
- Loss: 25747.92578125
Epoch: 2904
- Loss: 10193.59765625
Epoch: 2905
- Loss: 18775.904296875
Epoch: 2906
- Loss: 14220.5087890625
Epoch: 2907
- Loss: 6501.9501953125
Epoch: 2908
- Loss: 22726.28125
Epoch: 2909
- Loss: 7847.80517578125
Epoch: 2910
- Loss: 11924.46875
Epoch: 2911
- Loss: 6376.95751953125
Epoch: 2912
- Loss: 12406.7509765625
Epoch: 2913
- Loss: 11233.251953125
Epoch: 2914
- Loss: 11199.0146484375
Epoch: 2915
- Loss: 28282.326171875
Epoch: 2916
- Loss: 10199.681640625
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10078.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10032.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10048.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3001
- Loss: 11927.3818359375
Epoch: 3002
- Loss: 18335.255859375
Epoch: 3003
- Loss: 12532.4736328125
Epoch: 3004
- Loss: 14541.5966796875
Epoch: 3005
- Loss: 8334.3017578125
Epoch: 3006
- Loss: 19602.26953125
Epoch: 3007
- Loss: 16297.171875
Epoch: 3008
- Loss: 13226.9736328125
Epoch: 3009
- Loss: 30175.205078125
Epoch: 3010
- Loss: 11980.5400390625
Epoch: 3011
- Loss: 17231.44921875
Epoch: 3012
- Loss: 11906.263671875
Epoch: 3013
- Loss: 7496.09814453125
Epoch: 3014
- Loss: 11792.408203125
Epoch: 3015
- Loss: 19264.791015625
Epoch: 3016
- Loss: 5498.83447265

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10040.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10055.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10008.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3101
- Loss: 18336.4453125
Epoch: 3102
- Loss: 32478.70703125
Epoch: 3103
- Loss: 28446.236328125
Epoch: 3104
- Loss: 16745.63671875
Epoch: 3105
- Loss: 32700.44140625
Epoch: 3106
- Loss: 13697.419921875
Epoch: 3107
- Loss: 14267.607421875
Epoch: 3108
- Loss: 9944.9189453125
Epoch: 3109
- Loss: 6890.69287109375
Epoch: 3110
- Loss: 32546.583984375
Epoch: 3111
- Loss: 12178.408203125
Epoch: 3112
- Loss: 14087.3076171875
Epoch: 3113
- Loss: 6766.37890625
Epoch: 3114
- Loss: 12070.8134765625
Epoch: 3115
- Loss: 20018.626953125
Epoch: 3116
- Loss: 9993.173828125
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10065.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10021.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10078.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3201
- Loss: 22336.796875
Epoch: 3202
- Loss: 12200.787109375
Epoch: 3203
- Loss: 17251.291015625
Epoch: 3204
- Loss: 18798.166015625
Epoch: 3205
- Loss: 8967.40625
Epoch: 3206
- Loss: 7058.33544921875
Epoch: 3207
- Loss: 28380.65234375
Epoch: 3208
- Loss: 18046.119140625
Epoch: 3209
- Loss: 8111.55712890625
Epoch: 3210
- Loss: 9814.7958984375
Epoch: 3211
- Loss: 13732.5078125
Epoch: 3212
- Loss: 13790.8935546875
Epoch: 3213
- Loss: 24096.6796875
Epoch: 3214
- Loss: 13230.73046875
Epoch: 3215
- Loss: 12860.1953125
Epoch: 3216
- Loss: 10887.1669921875
Epoch: 321

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10008.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10067.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10078.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3301
- Loss: 10203.8974609375
Epoch: 3302
- Loss: 13159.1826171875
Epoch: 3303
- Loss: 21069.35546875
Epoch: 3304
- Loss: 19847.462890625
Epoch: 3305
- Loss: 21742.755859375
Epoch: 3306
- Loss: 14585.08984375
Epoch: 3307
- Loss: 38900.2421875
Epoch: 3308
- Loss: 29551.392578125
Epoch: 3309
- Loss: 12445.087890625
Epoch: 3310
- Loss: 16779.9375
Epoch: 3311
- Loss: 34831.98828125
Epoch: 3312
- Loss: 10624.3544921875
Epoch: 3313
- Loss: 5006.90576171875
Epoch: 3314
- Loss: 32518.787109375
Epoch: 3315
- Loss: 19462.83203125
Epoch: 3316
- Loss: 18714.361328125
Epoch

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10075.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10056.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10052.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3401
- Loss: 11057.7822265625
Epoch: 3402
- Loss: 10275.943359375
Epoch: 3403
- Loss: 10546.17578125
Epoch: 3404
- Loss: 17296.423828125
Epoch: 3405
- Loss: 11229.7177734375
Epoch: 3406
- Loss: 17280.09765625
Epoch: 3407
- Loss: 14052.87109375
Epoch: 3408
- Loss: 21570.255859375
Epoch: 3409
- Loss: 21611.96875
Epoch: 3410
- Loss: 11921.4150390625
Epoch: 3411
- Loss: 15046.6640625
Epoch: 3412
- Loss: 11902.34765625
Epoch: 3413
- Loss: 14809.501953125
Epoch: 3414
- Loss: 13179.1396484375
Epoch: 3415
- Loss: 7460.44970703125
Epoch: 3416
- Loss: 30318.837890625
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10067.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10004.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10070.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3501
- Loss: 6494.07958984375
Epoch: 3502
- Loss: 37695.7421875
Epoch: 3503
- Loss: 13705.4169921875
Epoch: 3504
- Loss: 25355.189453125
Epoch: 3505
- Loss: 34082.8125
Epoch: 3506
- Loss: 12167.2060546875
Epoch: 3507
- Loss: 19419.37109375
Epoch: 3508
- Loss: 7963.46533203125
Epoch: 3509
- Loss: 12957.5068359375
Epoch: 3510
- Loss: 13544.7431640625
Epoch: 3511
- Loss: 8310.1728515625
Epoch: 3512
- Loss: 15701.4814453125
Epoch: 3513
- Loss: 20109.646484375
Epoch: 3514
- Loss: 13644.107421875
Epoch: 3515
- Loss: 12448.318359375
Epoch: 3516
- Loss: 21679.126953125

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10007.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10063.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10008.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3601
- Loss: 12391.2802734375
Epoch: 3602
- Loss: 35259.7109375
Epoch: 3603
- Loss: 4991.4013671875
Epoch: 3604
- Loss: 11808.30859375
Epoch: 3605
- Loss: 12625.1064453125
Epoch: 3606
- Loss: 18515.53125
Epoch: 3607
- Loss: 11177.58203125
Epoch: 3608
- Loss: 11119.96875
Epoch: 3609
- Loss: 8604.6787109375
Epoch: 3610
- Loss: 28969.34375
Epoch: 3611
- Loss: 18838.384765625
Epoch: 3612
- Loss: 20035.228515625
Epoch: 3613
- Loss: 18358.447265625
Epoch: 3614
- Loss: 8984.220703125
Epoch: 3615
- Loss: 20215.30859375
Epoch: 3616
- Loss: 19109.546875
Epoch: 3617
- Los

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10026.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10060.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10047.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3701
- Loss: 12209.4287109375
Epoch: 3702
- Loss: 7468.56689453125
Epoch: 3703
- Loss: 21814.302734375
Epoch: 3704
- Loss: 18352.951171875
Epoch: 3705
- Loss: 14752.98828125
Epoch: 3706
- Loss: 12603.5869140625
Epoch: 3707
- Loss: 29654.537109375
Epoch: 3708
- Loss: 30061.7421875
Epoch: 3709
- Loss: 17136.845703125
Epoch: 3710
- Loss: 19686.890625
Epoch: 3711
- Loss: 7438.90478515625
Epoch: 3712
- Loss: 33179.1171875
Epoch: 3713
- Loss: 29963.392578125
Epoch: 3714
- Loss: 14709.787109375
Epoch: 3715
- Loss: 6550.2197265625
Epoch: 3716
- Loss: 26041.56640625
Epo

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10010.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10076.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10026.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3801
- Loss: 16674.693359375
Epoch: 3802
- Loss: 14418.1083984375
Epoch: 3803
- Loss: 34104.21875
Epoch: 3804
- Loss: 21337.310546875
Epoch: 3805
- Loss: 16685.458984375
Epoch: 3806
- Loss: 7223.2783203125
Epoch: 3807
- Loss: 16093.123046875
Epoch: 3808
- Loss: 28469.029296875
Epoch: 3809
- Loss: 11134.9287109375
Epoch: 3810
- Loss: 15779.0888671875
Epoch: 3811
- Loss: 12190.08984375
Epoch: 3812
- Loss: 9591.2861328125
Epoch: 3813
- Loss: 15232.875
Epoch: 3814
- Loss: 14212.7587890625
Epoch: 3815
- Loss: 31935.44140625
Epoch: 3816
- Loss: 9968.70703125
Epoch: 3

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10058.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10069.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10054.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1
Epoch: 3901
- Loss: 11817.955078125
Epoch: 3902
- Loss: 11941.2177734375
Epoch: 3903
- Loss: 7634.77587890625
Epoch: 3904
- Loss: 31187.599609375
Epoch: 3905
- Loss: 13514.912109375
Epoch: 3906
- Loss: 20463.541015625
Epoch: 3907
- Loss: 26991.58984375
Epoch: 3908
- Loss: 13383.3427734375
Epoch: 3909
- Loss: 10126.6962890625
Epoch: 3910
- Loss: 22101.513671875
Epoch: 3911
- Loss: 23307.65625
Epoch: 3912
- Loss: 16631.287109375
Epoch: 3913
- Loss: 6980.19677734375
Epoch: 3914
- Loss: 26044.8125
Epoch: 3915
- Loss: 11249.23046875
Epoch: 3916
- Loss: 30275.609375
Epoch: 

  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


Converted: VCC2SF1 -> VCC2SF2
Converted: VCC2SF1 -> VCC2SM1
Converted: VCC2SF1 -> VCC2SM2
Source File:10006.wav
Converted: VCC2SF2 -> VCC2SF1
Converted: VCC2SF2 -> VCC2SM1
Converted: VCC2SF2 -> VCC2SM2
Source File:10010.wav
Converted: VCC2SM1 -> VCC2SF1
Converted: VCC2SM1 -> VCC2SF2
Converted: VCC2SM1 -> VCC2SM2
Source File:10050.wav
Converted: VCC2SM2 -> VCC2SF1
Converted: VCC2SM2 -> VCC2SF2
Converted: VCC2SM2 -> VCC2SM1

Time Elapsed: 02:09:20


In [16]:
for v in data_list:
    if "log_f0_"+v+".npz" in  os.listdir(os.path.join(test_dir, v)):
        continue
    print("Preprocess: " + v)
    preprocess_voice(os.path.join(test_dir, v), v)

Preprocess: VCC2SF1
Preprocessing Data...
Data Loading...
Extracting f0 and mcep...
Saving f0 Data...
Saving mcep Data...
Preprocessing Done.
Time Elapsed for Data Preprocessing: 00:00:55
Preprocess: VCC2SF2
Preprocessing Data...
Data Loading...
Extracting f0 and mcep...
Saving f0 Data...
Saving mcep Data...
Preprocessing Done.
Time Elapsed for Data Preprocessing: 00:00:56
Preprocess: VCC2SM1
Preprocessing Data...
Data Loading...
Extracting f0 and mcep...
Saving f0 Data...
Saving mcep Data...
Preprocessing Done.
Time Elapsed for Data Preprocessing: 00:01:08
Preprocess: VCC2SM2
Preprocessing Data...
Data Loading...
Extracting f0 and mcep...
Saving f0 Data...
Saving mcep Data...
Preprocessing Done.
Time Elapsed for Data Preprocessing: 00:00:57


In [17]:
def conv_all(model):
    print("Conversion Start.")
    
    if not os.path.exists(test_dir_t):
        os.makedirs(test_dir_t)
    
    for s_label in range(nb_label):
        
        output_label_dir = os.path.join(test_dir_t, data_list[s_label])
        if not os.path.exists(output_label_dir):
            os.makedirs(output_label_dir)
    
        voice_path_s = os.path.join(test_dir, data_list[s_label])

        count = -1
        files = os.listdir(voice_path_s)

        for file in files:
            if file.count("wav") == 0:
                continue

            for t_label in range(nb_label):
                if (t_label == s_label):
                    continue

                voice_path_t = os.path.join(data_dir, data_list[t_label])

                wav, _ = librosa.load(os.path.join(voice_path_s, file), sr = sampling_rate, mono = True)
                wav = librosa.util.normalize(wav, norm=np.inf, axis=None)
                wav = wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
                f0, timeaxis, sp, ap, mc = world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)

                mc_transposed  = np.array(mc).T

                mcep_normalization_params_s = np.load(os.path.join(voice_path_s, "mcep_"+data_list[s_label]+".npz"))
                mcep_mean_s = mcep_normalization_params_s['mean']
                mcep_std_s = mcep_normalization_params_s['std']    
                mcep_normalization_params_t = np.load(os.path.join(voice_path_t, "mcep_"+data_list[t_label]+".npz"))
                mcep_mean_t = mcep_normalization_params_t['mean']
                mcep_std_t = mcep_normalization_params_t['std']

                mc_norm = (mc_transposed - mcep_mean_s) / mcep_std_s

                x = torch.Tensor(mc_norm).view(1, 1, mc_norm.shape[0], mc_norm.shape[1])

                label_s_tensor = torch.Tensor(np.array([s_label])).view(1, 1)
                label_t_tensor = torch.Tensor(np.array([t_label])).view(1, 1)

                x = x.to(device)
                label_s_tensor = label_s_tensor.to(device)
                label_t_tensor = label_t_tensor.to(device)

                mu_enc, logvar_enc = model.encode(x, label_s_tensor)
                z_enc = model.reparameterize(mu_enc, logvar_enc)
                # x^
                mu_dec_t, logvar_dec_t = model.decode(z_enc, label_t_tensor)
                z_dec_t = model.reparameterize(mu_dec_t, logvar_dec_t)
                if (torch.cuda.is_available()):
                    z_dec_t = z_dec_t.data.cpu().numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))
                else:
                    z_dec_t = z_dec_t.data.numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))
                # x_
                mu_dec_s, logvar_dec_s = model.decode(z_enc, label_s_tensor)
                z_dec_s = model.reparameterize(mu_dec_s, logvar_dec_s)
                if (torch.cuda.is_available()):
                    z_dec_s = z_dec_s.data.cpu().numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))
                else:
                    z_dec_s = z_dec_s.data.numpy().reshape((mc_norm.shape[0], mc_norm.shape[1]))

                mc_converted_t = z_dec_t * mcep_std_t + mcep_mean_t
                mc_converted_t = mc_converted_t.T
                mc_converted_t = np.ascontiguousarray(mc_converted_t)
                sp_converted_t = world_decode_mc(mc = mc_converted_t, fs = sampling_rate)
                mc_converted_s = z_dec_s * mcep_std_s + mcep_mean_s
                mc_converted_s = mc_converted_s.T
                mc_converted_s = np.ascontiguousarray(mc_converted_s)
                sp_converted_s = world_decode_mc(mc = mc_converted_s, fs = sampling_rate)

                sp_gained = np.multiply(sp, np.divide(sp_converted_t, sp_converted_s))

                logf0s_normalization_params_s = np.load(os.path.join(voice_path_s, "log_f0_"+data_list[s_label]+".npz"))
                logf0s_mean_s = logf0s_normalization_params_s['mean']
                logf0s_std_s = logf0s_normalization_params_s['std']
                logf0s_normalization_params_t = np.load(os.path.join(voice_path_t, "log_f0_"+data_list[t_label]+".npz"))
                logf0s_mean_t = logf0s_normalization_params_t['mean']
                logf0s_std_t = logf0s_normalization_params_t['std']

                f0_converted = pitch_conversion(f0 = f0, mean_log_src = logf0s_mean_s, std_log_src = logf0s_std_s, mean_log_target = logf0s_mean_t, std_log_target = logf0s_std_t)

                wav_transformed = world_speech_synthesis(f0 = f0_converted, sp = sp_gained, ap = ap, fs = sampling_rate, frame_period = frame_period)
                sf.write(os.path.join(output_label_dir, data_list[s_label]+"_to_"+data_list[t_label]+"_["+file+"].wav"), wav_transformed, sampling_rate)
                wav_source = world_speech_synthesis(f0 = f0_converted, sp = sp, ap = ap, fs = sampling_rate, frame_period = frame_period)
                sf.write(os.path.join(output_label_dir, data_list[s_label]+"_to_"+data_list[t_label]+"_["+file+"]_nonconv.wav"), wav_source, sampling_rate)
                
            count += 1
            if (count % 10 == 0):
                print("{} ({}/{}) : {:.1f} % is done...".format(data_list[s_label], str(s_label+1), str(nb_label), count*100/len(files)))
    print("Finish.")

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

model = model_load(model_dir, model_name)
model.cuda()
model.eval()
conv_all(model)

cuda
Conversion Start.


  f0_converted = np.exp((np.log(f0) - mean_log_src) / std_log_src * std_log_target + mean_log_target)


VCC2SF1 (1/4) : 0.0 % is done...
VCC2SF1 (1/4) : 26.3 % is done...
VCC2SF1 (1/4) : 52.6 % is done...
VCC2SF1 (1/4) : 78.9 % is done...
VCC2SF2 (2/4) : 0.0 % is done...
VCC2SF2 (2/4) : 27.0 % is done...
VCC2SF2 (2/4) : 54.1 % is done...
VCC2SF2 (2/4) : 81.1 % is done...
VCC2SM1 (3/4) : 0.0 % is done...
VCC2SM1 (3/4) : 27.0 % is done...
VCC2SM1 (3/4) : 54.1 % is done...
VCC2SM1 (3/4) : 81.1 % is done...
VCC2SM2 (4/4) : 0.0 % is done...
VCC2SM2 (4/4) : 27.0 % is done...
VCC2SM2 (4/4) : 54.1 % is done...
VCC2SM2 (4/4) : 81.1 % is done...
Finish.
