In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys, errno
import glob
import numpy as np
import tensorflow as tf
from tensorflow import keras
from interleave_convolutional import *
# from features import dataset_utils # FIXME this is what this originally was
from dataset_utils import *
from gan_models import *
from misc import *
import pandas as pd

In [3]:
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# Motivation

Train a GAN model to synthesize PCG audio. This means both training the discriminator and generator

# Training Configuration

In [5]:
# Code to do WGAN-GP training
# NOTE: don't attempt to run this in 'dft' or 'dct' configuration without changing the data loading method,
#   unless you have a lot of RAM (something like >= 24GB); as the method used to compute the transforms are
#   rather inefficient memory-wise for the sake of speed


# ---------------------------------------------------------------------------------------------------------------------
# Training Configuration
#   DATASET_PATTERN: glob pattern for all .wav files to train on
#   OUT_DIR: output directory, will contain the following files:
#       model_g.h5 - the saved generator model weights (note that only the weights are saved)
#       model_d.h5 - the saved discriminator model weights (note that only the weights are saved)
#       normalization.npy - normalization factors that should be divided element-wise with generator output before
#           synthesizing the output audio
#   DATA_REPR: data representation to use
#       'dft' for DFT images, 'dct' for DCT images, anything else for raw waveform
#   USE_IL: use ILConv in the generator; automatically is true for raw waveform representation
#   BIAS_OUT: use bias at the last layer of the generator
#   G_OPTIM: generator optimizer, defaults to keras.optimizers.Adam(0.0001, beta_1=0.5, beta_2=0.9)
#   D_OPTIM: discriminator optimizer, defaults to keras.optimizers.Adam(0.0001, beta_1=0.5, beta_2=0.9)
#   D_STEPS_PER_G: number of iterations to train the discriminator per iteration of generator training, must be >= 1
# ---------------------------------------------------------------------------------------------------------------------
DATASET_PATTERN = '/content/drive/MyDrive/Stuff I Coded/PCG_synthesis/data/train/*.wav'
OUT_DIR = '/content/drive/MyDrive/Stuff I Coded/PCG_synthesis/out'
DATA_REPR = 'dft'
USE_IL = True
BIAS_OUT = True
BATCH_SIZE = 64
MODEL_SIZE = 64
NUM_LATENT = 100
NUM_EPOCH_PER_CHECKPOINT = 50
NUM_EPOCH = 150
G_OPTIM = None
D_OPTIM = None
D_STEPS_PER_G = 5
MURMUR_TYPE = "present"
DATA_KEY_DIR = '/content/drive/MyDrive/Stuff I Coded/PCG_synthesis/data/circor_digiscope_by_pcg.csv'

In [6]:
# change these to load in from a different save location!
FROM_CHECKPOINT = True
START_EPOCH = 301
G_CHECKPOINT = "/content/drive/MyDrive/Stuff I Coded/PCG_synthesis/out/model_g_present_300.h5"
D_CHECKPOINT = "/content/drive/MyDrive/Stuff I Coded/PCG_synthesis/out/model_d_present_300.h5"

In [7]:
# reading in data key so we have access to murmur labels for the
# audio samples we feed into the mode
data_key_df = pd.read_csv(DATA_KEY_DIR)
train_data_key_df = data_key_df[data_key_df['split']=='TRAIN']

In [8]:
datalist = glob.glob(DATASET_PATTERN)
data = read_wav_dataset_maximal(datalist, train_data_key_df, MURMUR_TYPE)
num_samples = data.shape[0]

In [9]:
num_samples

1723

In [None]:
# 'labels' used to apply in computing the Wasserstein loss
positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32)
negative_y = -positive_y
dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32) # need to feed in some labels for gradient penalty loss

# create and build model
if (DATA_REPR == 'dft'):
    gen_func = dft_generator if USE_IL else dft_generator_tr
    disc_func = dft_discriminator
elif (DATA_REPR == 'dct'):
    gen_func = dct_generator if USE_IL else dct_generator_tr
    disc_func = dct_discriminator
else:
    gen_func = wave_generator
    disc_func = wave_discriminator
generator = gen_func(MODEL_SIZE, NUM_LATENT, BIAS_OUT)
discriminator = disc_func(MODEL_SIZE)

if FROM_CHECKPOINT:
    generator.load_weights(G_CHECKPOINT)
    discriminator.load_weights(D_CHECKPOINT)

g_m, d_m = create_wgan(generator, discriminator)

# read in dataset and transform it appropriately
datalist = glob.glob(DATASET_PATTERN)
data = read_wav_dataset_maximal(datalist, train_data_key_df, MURMUR_TYPE)
num_samples = data.shape[0]
if (DATA_REPR == 'dft'):
    data = dft_transform_forward(data)
elif (DATA_REPR == 'dct'):
    data = dct_transform_forward(data)
else:
    data = np.expand_dims(data, axis=-1)

# normalize data
if (DATA_REPR == 'dft' or DATA_REPR == 'dct'):
    if (DATA_REPR == 'dft'):
        data_mag = np.abs(data[:,:,:,0:1] + 1j * data[:,:,:,1:2])
    else:
        data_mag = np.abs(data)
    amp_max = np.max(data_mag, axis=(0,-2,-1), keepdims=True)
    norm_f = 0.9/amp_max
else:
    norm_f = 1.0
np.save(os.path.join(OUT_DIR, 'normalization.npy'), norm_f)

# running tallies
epochs = []
d_losses = []
grad_penalties = []
g_losses = []

# begin training
model_path_g = os.path.join(OUT_DIR, 'model_g_{}_{:d}.h5')
model_path_d = os.path.join(OUT_DIR, 'model_d_{}_{:d}.h5')
idx = np.arange(num_samples)
print(f"Epoch range is {START_EPOCH} to {START_EPOCH + NUM_EPOCH - 1}")
for epoch_i in range((START_EPOCH * D_STEPS_PER_G), ((START_EPOCH + NUM_EPOCH) * D_STEPS_PER_G)):
    np.random.shuffle(idx)
    for superbatch_i in range(0, num_samples // (BATCH_SIZE * D_STEPS_PER_G)):
        batch_idx = idx[(superbatch_i*BATCH_SIZE*D_STEPS_PER_G):((superbatch_i+1)*BATCH_SIZE*D_STEPS_PER_G)]
        # train discriminator
        for batch_i in range(0, D_STEPS_PER_G):
            real = data[batch_idx[(batch_i*BATCH_SIZE):((batch_i+1)*BATCH_SIZE)],...] * norm_f
            noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, NUM_LATENT)).astype(np.float32)
            d_loss = d_m.train_on_batch([real, noise], [positive_y, negative_y, dummy_y])
        # train generator
        noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, NUM_LATENT)).astype(np.float32)
        g_loss = g_m.train_on_batch(noise, positive_y)

    if (epoch_i % D_STEPS_PER_G == 0):
        print('Epoch {:d}:'.format(epoch_i // D_STEPS_PER_G))
        print('\t D loss: {:f}, grad penalty: {:f}'.format(d_loss[1]+d_loss[2], d_loss[3]))
        print('\t G loss: {:f}'.format(g_loss), flush=True)

        # record losses each epoch
        epochs.append(epoch_i // D_STEPS_PER_G)
        d_losses.append(d_loss[1]+d_loss[2])
        grad_penalties.append(d_loss[3])
        g_losses.append(g_loss)

        # save model weights if this is a checkpoint epoch
        if ((epoch_i // D_STEPS_PER_G) % NUM_EPOCH_PER_CHECKPOINT == 0):
            generator.save_weights(model_path_g.format(MURMUR_TYPE, epoch_i // D_STEPS_PER_G))
            discriminator.save_weights(model_path_d.format(MURMUR_TYPE, epoch_i // D_STEPS_PER_G))

            losses_df = pd.DataFrame({'epoch': epochs, 'd_loss': d_losses, 'grad_penalty': grad_penalties, 'g_loss': g_losses})
            losses_df.to_csv(os.path.join(OUT_DIR, f"losses_epochs{(epoch_i // D_STEPS_PER_G)-NUM_EPOCH_PER_CHECKPOINT}_to_{(epoch_i // D_STEPS_PER_G)}.csv"))
            print("Checkpoint Saved")
            epochs = []
            d_losses = []
            grad_penalties = []
            g_losses = []

# save final model weights
generator.save_weights(model_path_g.format(MURMUR_TYPE, NUM_EPOCH))
discriminator.save_weights(model_path_d.format(MURMUR_TYPE, NUM_EPOCH))


(1723, 16384)
(1723, 129, 129)
Epoch range is 301 to 450
Epoch 301:
	 D loss: -1.564926, grad penalty: 0.147067
	 G loss: 0.113591
Epoch 302:
	 D loss: -1.805019, grad penalty: 0.093843
	 G loss: 0.097436
Epoch 303:
	 D loss: -1.271383, grad penalty: 0.105606
	 G loss: 0.151496
Epoch 304:
	 D loss: -1.270061, grad penalty: 0.097404
	 G loss: 0.163782
Epoch 305:
	 D loss: -1.796400, grad penalty: 0.188675
	 G loss: 0.087375
Epoch 306:
	 D loss: -1.177656, grad penalty: 0.059557
	 G loss: 0.228623
Epoch 307:
	 D loss: -1.984288, grad penalty: 0.193827
	 G loss: 0.044280
Epoch 308:
	 D loss: -1.325083, grad penalty: 0.113986
	 G loss: 0.024674
Epoch 309:
	 D loss: -1.108460, grad penalty: 0.109796
	 G loss: 0.170303
Epoch 310:
	 D loss: -1.221710, grad penalty: 0.128915
	 G loss: 0.196620
Epoch 311:
	 D loss: -1.395240, grad penalty: 0.138338
	 G loss: 0.220349
Epoch 312:
	 D loss: -1.318771, grad penalty: 0.108767
	 G loss: 0.076629
Epoch 313:
	 D loss: -1.555609, grad penalty: 0.217310


In [None]:
# automatically disconnect runtime after training
# (avoids wasting compute units)
from google.colab import runtime
runtime.unassign()