In [1]:
import os
import numpy as np
import time
import librosa
import pyworld
import matplotlib.pyplot as plt
import tensorflow as tf 
from preprocess import *

from model import CycleGAN

  from ._conv import register_converters as _register_converters


In [2]:
train_A_dir = './data/voice_data/Test' # 300
train_B_dir = './data/voice_data/Test' # 100

model_dir = './model/f4_m2_mc24_fr256_model_U1'
model_name = 'f4_m2_mc24_fr256_model_U1.ckpt'

pre_model_dir = './model/f4_m2_mc24_fr256_model_U1'
pre_model_name = 'f4_m2_mc24_fr256_model_U1.ckpt'

figure_dir = "./figures/f4_m2_mc24_fr256_model_U1"

random_seed = 0
np.random.seed(random_seed)
validation_A_dir = None
validation_B_dir = None
tensorboard_log_dir = './log'

In [3]:
isContinue = False

num_epochs = 10000
num_pre_epochs = 0
mini_batch_size = 1
generator_learning_rate = 0.0002
generator_learning_rate_decay = generator_learning_rate / 200000
discriminator_learning_rate = 0.0001
discriminator_learning_rate_decay = discriminator_learning_rate / 200000
sampling_rate = 16000
num_mcep = 24
frame_period = 5.0
n_frames = 256
lambda_cycle = 10
lambda_identity = 5

lossG = []
lossD = []
loss_num = 0

In [None]:
print('Preprocessing Data...')

start_time = time.time()

print("Data Loading...")

wavs_A = load_wavs(wav_dir = train_A_dir, sr = sampling_rate)
wavs_B = load_wavs(wav_dir = train_B_dir, sr = sampling_rate)

print("Extracting f0 and mcep...")

if not os.path.exists(model_dir):
    os.makedirs(model_dir)

print("process: 1")
f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = world_encode_data(wavs = wavs_A, fs = sampling_rate, frame_period = frame_period, coded_dim = num_mcep)
f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = world_encode_data(wavs = wavs_B, fs = sampling_rate, frame_period = frame_period, coded_dim = num_mcep)

del wavs_A, timeaxes_A, sps_A, aps_A
del wavs_B, timeaxes_B, sps_B, aps_B

print("process: 2")
log_f0s_mean_A, log_f0s_std_A = logf0_statistics(f0s_A)
log_f0s_mean_B, log_f0s_std_B = logf0_statistics(f0s_B)

print("Saving f0 Data...")
np.savez(os.path.join(model_dir, 'logf0s_normalization.npz'), mean_A = log_f0s_mean_A, std_A = log_f0s_std_A, mean_B = log_f0s_mean_B, std_B = log_f0s_std_B)

del f0s_A, log_f0s_mean_A, log_f0s_std_A
del f0s_B, log_f0s_mean_B, log_f0s_std_B

print("process: 3")
coded_sps_A_transposed = transpose_in_list(lst = coded_sps_A)
coded_sps_B_transposed = transpose_in_list(lst = coded_sps_B)

del coded_sps_A
del coded_sps_B

print("process: 4")
coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = coded_sps_normalization_fit_transoform(coded_sps = coded_sps_A_transposed)
coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = coded_sps_normalization_fit_transoform(coded_sps = coded_sps_B_transposed)

print("Saving mcep Data...")
np.savez(os.path.join(model_dir, 'mcep_normalization.npz'), mean_A = coded_sps_A_mean, std_A = coded_sps_A_std, mean_B = coded_sps_B_mean, std_B = coded_sps_B_std)    

end_time = time.time()
time_elapsed = end_time - start_time

print('Preprocessing Done.')

print('Time Elapsed for Data Preprocessing: %02d:%02d:%02d' % (time_elapsed // 3600, (time_elapsed % 3600 // 60), (time_elapsed % 60 // 1)))


print("Model Loading...")

model = CycleGAN(num_features = num_mcep)
if (isContinue):
    model.load(filepath = os.path.join(pre_model_dir, pre_model_name))

print("Training start.")

for epoch in range(num_epochs):
    epoch += num_pre_epochs + 1

    start_time = time.time()

    print('Epoch: %d' % epoch)

    dataset_A, dataset_B = sample_train_data(dataset_A = coded_sps_A_norm, dataset_B = coded_sps_B_norm, n_frames = n_frames)

    n_samples = dataset_A.shape[0]

    for i in range(n_samples // mini_batch_size):

        num_iterations = n_samples // mini_batch_size * epoch + i

        if num_iterations > 10000:
            lambda_identity = 0
        if num_iterations > 200000:
            generator_learning_rate = max(0, generator_learning_rate - generator_learning_rate_decay)
            discriminator_learning_rate = max(0, discriminator_learning_rate - discriminator_learning_rate_decay)

        start = i * mini_batch_size
        end = (i + 1) * mini_batch_size

        generator_loss, discriminator_loss = model.train(input_A = dataset_A[start:end], input_B = dataset_B[start:end], lambda_cycle = lambda_cycle, lambda_identity = lambda_identity, generator_learning_rate = generator_learning_rate, discriminator_learning_rate = discriminator_learning_rate)

        if i % 50 == 0:
            lossG.append(generator_loss)
            lossD.append(discriminator_loss)
            loss_num += 1

    if epoch % 100 == 0:
        model.save(directory = model_dir, filename = model_name)
    if (epoch % 2000 == 0):
        model.save(directory = model_dir, filename = model_name + "_" + str(epoch))

    if epoch % 10 == 0:
        if not os.path.exists(figure_dir):
            os.makedirs(figure_dir)
        x = np.linspace(0, loss_num, loss_num)
        plt.figure()
        plt.plot(x, lossG, label="Gen")
        plt.plot(x, lossD, label="Dis")
        plt.savefig(figure_dir + "/" + "epoch_{:05}".format(epoch) + ".png")

    elapsed_time = time.time() - start_time
    print('Time Elapsed for one epoch: %02d:%02d:%02d' % (elapsed_time // 3600, (elapsed_time % 3600 // 60), (elapsed_time % 60 // 1)))        

model.save(directory = model_dir, filename = model_name)

x = np.linspace(0, loss_num, loss_num)

plt.plot(x, lossG, label="Gen")
plt.plot(x, lossD, label="Dis")
plt.savefig(figure_dir + "/" + "epoch_{:05}".format(num_epochs) + ".png")

Preprocessing Data...
Data Loading...
Extracting f0 and mcep...
process: 1
process: 2
Saving f0 Data...
process: 3
process: 4
Saving mcep Data...
Preprocessing Done.
Time Elapsed for Data Preprocessing: 00:00:05
Model Loading...
Training start.
Epoch: 1
