In [1]:
import os
from pathlib import Path
import sys

import librosa
from matplotlib import colors
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import IPython.display as ipd

from tqdm.notebook import tqdm

---
# Draft

In [None]:
RANDOM_SEED = 13131313
np.random.seed(RANDOM_SEED)

ROOT_DIR = os.path.abspath('..')  # ROOT_DIR = Path(__file__).parents[1].resolve()
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)
RESEARCH_DIR = ROOT_DIR / Path('researches')

TRAIN_DATA_PATH = ROOT_DIR / Path('data/processed_train_data_rol_len256_rows8/binary')

The idea is to transform audio to image-like data and use DCGAN.

To use GAN or DCGAN audio needs to be processed. It was tested that converting to Mel-Spectrogram and backward works really bad with NES melodies. So Separated Score Format was chosen.

In [None]:
SAMPLE_LEN = 64  # 128 = 4-5sec
ROWS_CNT = 4

In [None]:
# TODO: Data Generator!

train_data = []
for file in tqdm(TRAIN_DATA_PATH.iterdir()):
    train_data.append(np.load(file))

len(train_data), train_data[0].shape

In [None]:
BUFFER_SIZE = 60000
BATCH_SIZE = 256

NOISE_VECTOR_LEN = 100
# Data Shape: (32, 32, 1)

train_dataset = tf.data.Dataset.from_tensor_slices(train_data).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [None]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(8*8*256, use_bias=False, input_shape=(NOISE_VECTOR_LEN,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Reshape((8, 8, 256)))
    assert model.output_shape == (None, 8, 8, 256)  # Note: None is the batch size
    # 7, 7, 256 - 8, 8, 256

    model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    assert model.output_shape == (None, 8, 8, 128)  # 7, 7, 128 - 8, 8, 128
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    assert model.output_shape == (None, 16, 16, 64) # 14, 14, 64 - 16, 16, 64
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())

    model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
    assert model.output_shape == (None, 32, 32, 1)  # 28, 28, 1 - 32, 32, 1

    return model

In [None]:
generator = make_generator_model()

noise = tf.random.normal([1, NOISE_VECTOR_LEN])
generated_image = generator(noise, training=False)

plt.imshow(generated_image[0, :, :, 0])

In [None]:
import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
from tensorflow.keras import layers
import time

from IPython import display

In [None]:
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', input_shape=[32, 32, 1]))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    model.add(layers.LeakyReLU())
    model.add(layers.Dropout(0.3))

    model.add(layers.Flatten())
    model.add(layers.Dense(1))

    return model

discriminator = make_discriminator_model()
decision = discriminator(generated_image)
print (decision)

In [None]:
# This method returns a helper function to compute cross entropy loss
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss


def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)


generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)


checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [None]:
EPOCHS = 1 # 50
noise_dim = NOISE_VECTOR_LEN
num_examples_to_generate = 16

# You will reuse this seed overtime (so it's easier)
# to visualize progress in the animated GIF)
seed = tf.random.normal([num_examples_to_generate, noise_dim])


# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
@tf.function
def train_step(images):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)
        
        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [None]:
def train(dataset, epochs):
    for epoch in range(epochs):
        start = time.time()

    for image_batch in dataset:
        train_step(image_batch)

    # Produce images for the GIF as you go
    display.clear_output(wait=True)
    generate_and_save_images(generator,
                             epoch + 1,
                             seed)

    # Save the model every 15 epochs
    if (epoch + 1) % 15 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

    # Generate after the final epoch
    display.clear_output(wait=True)
    generate_and_save_images(generator,
                           epochs,
                           seed)

def generate_and_save_images(model, epoch, test_input):
  # Notice `training` is set to False.
  # This is so all layers run in inference mode (batchnorm).

    predictions = model(test_input, training=False)

    fig = plt.figure(figsize=(4, 4))

    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i+1)
        plt.imshow(predictions[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
        plt.axis('off')

    plt.savefig('image_at_epoch_{:04d}.png'.format(epoch))
    plt.show()

In [None]:
train(train_dataset, EPOCHS)

In [None]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

In [None]:
# Display a single image using the epoch number
def display_image(epoch_no):
    return PIL.Image.open('image_at_epoch_{:04d}.png'.format(epoch_no))

display_image(EPOCHS)

---

# Generator

In [None]:
# generator = Sequential()

# # Dense
# generator.add(Dense(32 * 1 * 256, input_dim=NOISE_INPUT_DIM))
# generator.add(BatchNormalization(momentum=BATCH_NORMALIZATION_MOMENTUM))
# generator.add(Activation('relu'))
# generator.add(Reshape((32, 1, 256)))
# generator.add(Dropout(DROPOUT))
# # 32 * 1 * 256

# # Upsampling (instead of fractionally-strided convolution) and transposed convolution 1
# generator.add(UpSampling2D())
# generator.add(Conv2DTranspose(128, KERNEL_SIZE, padding='same'))
# generator.add(BatchNormalization(momentum=BATCH_NORMALIZATION_MOMENTUM))
# generator.add(Activation('relu'))
# # 64 * 2 * 128

# # Upsampling (instead of fractionally-strided convolution) and transposed convolution 2
# generator.add(UpSampling2D())
# generator.add(Conv2DTranspose(64, KERNEL_SIZE, padding='same'))
# generator.add(BatchNormalization(momentum=BATCH_NORMALIZATION_MOMENTUM))
# generator.add(Activation('relu'))
# # 128 * 4 * 64

# # Upsampling (instead of fractionally-strided convolution) and transposed convolution 3
# generator.add(Conv2DTranspose(1, KERNEL_SIZE, padding='same'))
# generator.add(BatchNormalization(momentum=BATCH_NORMALIZATION_MOMENTUM))
# generator.add(Activation('relu'))
# # 128 * 4 * 32

# generator.add(Conv2DTranspose(1, KERNEL_SIZE, padding='same'))
# generator.add(Activation('sigmoid'))
# # 128 * 4 * 1

# generator.summary()

# Discriminator

In [None]:
# discriminator = Sequential()
# input_shape = (SAMPLE_LEN, SAMPLE_WIDTH, 1)

# discriminator.add(Conv2D(64, KERNEL_SIZE, strides=2, input_shape=input_shape,
#                          padding='same', activation=LeakyReLU(alpha=LEAKY_ALPHA)))
# discriminator.add(Dropout(DROPOUT))
# discriminator.add(Conv2D(128, KERNEL_SIZE, strides=2, padding='same', activation=LeakyReLU(alpha=LEAKY_ALPHA)))
# discriminator.add(Dropout(DROPOUT))
# discriminator.add(Conv2D(256, KERNEL_SIZE, strides=2, padding='same', activation=LeakyReLU(alpha=LEAKY_ALPHA)))
# discriminator.add(Dropout(DROPOUT))
# discriminator.add(Conv2D(512, KERNEL_SIZE, strides=1, padding='same', activation=LeakyReLU(alpha=LEAKY_ALPHA)))
# discriminator.add(Dropout(DROPOUT))

# discriminator.add(Flatten())
# discriminator.add(Dense(1))
# discriminator.add(Activation('sigmoid'))
# discriminator.summary()

# Discriminator Model

In [None]:
# discriminator_model= Sequential()
# discriminator_model.add(discriminator)
# discriminator_model.compile(loss='binary_crossentropy',
#                             optimizer=RMSprop(lr=D_LEARNING_RATE, clipvalue=1.0, decay=6e-8),metrics=['accuracy'])

# Adversarial Model

In [None]:
# adversarial_model = Sequential()
# adversarial_model.add(generator)
# adversarial_model.add(discriminator)
# adversarial_model.compile(loss='binary_crossentropy',
#                           optimizer=RMSprop(lr=A_LEARNING_RATE, clipvalue=1.0, decay=3e-8), metrics=['accuracy'])

---

# Training

https://www.tensorflow.org/tutorials/generative/dcgan

https://tfs.rubius.com/RubiusProjects/Polyphemus/_git/cyclop-server?path=%2Fcomponents%2Fsiamese_network%2Ftrain_model.py

---

https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough

https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch

https://www.tensorflow.org/guide/keras/custom_callback

In [None]:
# class 