In [0]:
!pip install tf-nightly-2.0-preview --upgrade
!pip install tf-nightly-gpu-2.0-preview --upgrade
!pip install tfp-nightly --upgrade

!pip install git+git://github.com/andysigler/keras-mdn-layer.git@functional-mdn-layers#egg=keras-mdn-layer --upgrade

In [0]:
from __future__ import absolute_import, division, print_function

from datetime import datetime
import os
from shutil import copyfile

import numpy as np

import tensorflow as tf
print('Tensorflow Version: ', tf.__version__)
print('GPU: ', tf.test.gpu_device_name())
from tensorflow.keras import Sequential, layers, models, Input, optimizers

import mdn

from google.colab import drive

drive.mount('/content/gdrive')
drive_dest_folder = '/content/gdrive/My Drive/Colab Notebooks/data'

In [0]:
# length of one sample
sample_length = 32

data_filename = 'david_long_19_5_29_15_40_00.csv';
data_filename = os.path.join(drive_dest_folder, data_filename)
max_data = 100000
display_interval = 5000


def normalize_vox_data(data):
  s0, s1 = data.shape
  spectrum_adder = tf.concat(
      [
        tf.zeros((s0, 1), dtype=tf.dtypes.float32),
        tf.ones((s0, s1 - 1), dtype=tf.dtypes.float32) * 100
      ],
      1
  )
  spectrum_divider = tf.concat(
      [
        tf.ones((s0, 1), dtype=tf.dtypes.float32),
        tf.ones((s0, s1 - 1), dtype=tf.dtypes.float32) * 100
      ],
      1
  )
  pitch_divider = tf.concat(
      [
        tf.ones((s0, 1), dtype=tf.dtypes.float32) * 1000,
        tf.ones((s0, s1 - 1), dtype=tf.dtypes.float32)
      ],
      1
  )
  return tf.divide(
      tf.divide(
          tf.add(data, spectrum_adder),
          spectrum_divider
      ),
      pitch_divider
  )


def denormalize_vox_data(data):
  s0, s1 = data.shape
  spectrum_subtractor = tf.concat(
      [
        tf.zeros((s0, 1), dtype=tf.dtypes.float32),
        tf.ones((s0, s1 - 1), dtype=tf.dtypes.float32) * 100
      ],
      1
  )
  spectrum_multiplier = tf.concat(
      [
        tf.ones((s0, 1), dtype=tf.dtypes.float32),
        tf.ones((s0, s1 - 1), dtype=tf.dtypes.float32) * 100
      ],
      1
  )
  pitch_multiplier = tf.concat(
      [
        tf.ones((s0, 1), dtype=tf.dtypes.float32) * 1000,
        tf.ones((s0, s1 - 1), dtype=tf.dtypes.float32)
      ],
      1
  )
  return tf.subtract(
      tf.multiply(
          tf.multiply(data, pitch_multiplier),
          spectrum_multiplier),
      spectrum_subtractor
  )


def load_csv_to_tensor(max_samples=max_data):
  accumulated_data = None
  with open(data_filename, 'r') as f:
    features = f.readline().strip().split(',');
    print('Read in features:', features)
    for line in f:
      line = line.strip()
      line = [float(val.strip()) for val in line.split(',') if val.strip()]
      line = tf.constant([line], dtype=tf.dtypes.float32)
      if accumulated_data is None:
        accumulated_data = line
      else:
        accumulated_data = tf.concat([accumulated_data, line], 0)
      if accumulated_data.shape[0] % display_interval == 0:
        print('Read in {} lines from CSV'.format(accumulated_data.shape[0]))
      if accumulated_data.shape[0] >= max_samples:
        break
  return accumulated_data

In [0]:
csv_tensor = load_csv_to_tensor()
csv_tensor = normalize_vox_data(csv_tensor)
print('Parsed', csv_tensor.shape[0], 'lines samples')

In [0]:
display_parse_interval = 10000


def generate_training_sequences(data, x_length, y_length, y_offset):
  all_inputs = None
  all_outputs = None
  max_index = data.shape[0] - max(y_offset + y_length, x_length)
  for start_index in range(max_index):
    if start_index % display_parse_interval == 0:
      print('Parsing', start_index, '/', max_index)
    input_seq = tf.slice(
        data,
        [start_index, 0],
        [x_length, data.shape[1]]
    )
    input_seq = tf.expand_dims(input_seq, 0)
    output_seq = tf.slice(
        data,
        [start_index + y_offset, 0],
        [y_length, data.shape[1]]
    )
    output_seq = tf.expand_dims(output_seq, 0)
    if all_inputs is None:
      all_inputs = input_seq
      all_outputs = output_seq
    else:
      all_inputs = tf.concat([all_inputs, input_seq], 0)
      all_outputs = tf.concat([all_outputs, output_seq], 0)
  return all_inputs, all_outputs


def remove_silence(data_tensor):
  new_data_tensor = None
  def _add_value(np_val):
    nonlocal new_data_tensor
    new_tensor = tf.expand_dims(tf.constant(np_val), 0)
    if new_data_tensor is None:
      new_data_tensor = new_tensor
    else:
      new_data_tensor = tf.concat([new_data_tensor, new_tensor], 0)

  np_data = data_tensor.numpy()
  for i in range(data_tensor.shape[0]):
    if i % display_parse_interval == 0:
      print('Removing Silence from', i)
    if np_data[i][0][0] > 0:
      # automatically use sample if there is pitch data
      _add_value(np_data[i])
    elif max(np_data[i][0]) > 0.75:
      _add_value(np_data[i])
  return new_data_tensor

In [0]:
train_seq_length = 1

# Autoencoder
print('Parsing Autoencoder Train Data')
train_autoencoder, _ = generate_training_sequences(
  csv_tensor, train_seq_length, 1, 0)
print('Autoencoder Data Shape:', train_autoencoder.shape)
train_autoencoder = remove_silence(train_autoencoder)
print('No-Silence Data Shape:', train_autoencoder.shape)
train_data_autoencoder = tf.data.Dataset.from_tensor_slices(
    (train_autoencoder, train_autoencoder))
total_train_examples_autoencoder = train_autoencoder.shape[0]

In [0]:
# Visualize predictions.
import matplotlib.pyplot as plt

pixel_per_inch = 16


def draw_spectrum(seq):
  # time is left->right, lower frequencies at bottom of chart
  # the lowest band is the pitch (0.0-1.0)
#   seq = np.flip(np.rot90(seq, k=3), axis=1)
  x_size = int(seq.shape[1] / pixel_per_inch)
  y_size = int(seq.shape[0] / pixel_per_inch)
#   plt.figure(figsize=(x_size, y_size))  # inches
  plt.imshow(seq, origin="lower", cmap="gray")
  plt.show()


print('RNN Data')
for x, y in train_data_autoencoder.shuffle(1000).take(3):
  draw_spectrum(tf.concat([x,y], 0).numpy())

In [0]:
num_conv_layers = 3
num_conv_filters = 32
num_conv_pixels = 3
num_conv_steps = 1

conv_to_flat_multiplier = sample_length
for i in range(num_conv_layers):
  conv_to_flat_multiplier /= num_conv_steps
conv_to_flat_multiplier = int(conv_to_flat_multiplier)

num_hidden = int(conv_to_flat_multiplier * num_conv_filters * 0.25)
# num_hidden_2 = int(num_hidden_1 * 0.25)
num_latent_units = 2


def create_encoder():
  # ENCODER
  model = Sequential()
  model.add(layers.Reshape(
      (1, sample_length, 1),
      input_shape=(1, sample_length)
  ))
  for i in range(num_conv_layers):
      model.add(layers.Conv2D(
          filters=num_conv_filters,
          kernel_size=(1, num_conv_pixels),
          padding='same',
          strides=(1, num_conv_steps),
          activation='relu'
      ))
  # LATENT SPACE
  model.add(layers.Flatten())
  model.add(layers.Dense(
      num_hidden,
      activation='relu'
  ))
  model.add(layers.Dense(
      num_latent_units,
      activation='sigmoid'
  ))
  return model


def create_decoder():
  model = Sequential()
  # LATENT SPACE
  model.add(layers.Dense(
      num_hidden,
      activation='relu',
      input_shape=(num_latent_units,)
  ))
  model.add(layers.Dense(
      conv_to_flat_multiplier * num_conv_filters,
      activation='relu'
  ))
  model.add(layers.Reshape(
    (1, conv_to_flat_multiplier, num_conv_filters)
  ))
  # DECODER
  for i in range(num_conv_layers):
      model.add(layers.Conv2DTranspose(
          filters=num_conv_filters,
          kernel_size=(1, num_conv_pixels),
          padding='same',
          strides=(1, num_conv_steps),
          activation='relu'
      ))
  model.add(layers.Conv2DTranspose(
      filters=1,
      kernel_size=(1, num_conv_pixels),
      padding='same',
      strides=(1, 1),
      activation='sigmoid'
  ))
  model.add(layers.Reshape((1, sample_length)))
  return model

In [0]:
def dated_filename():
  return str(datetime.now().strftime('%Y_%m_%d_%H_%M_%S'))


def save_model(model, name):
  blob_name = '{0}_{1}.h5'.format(name, dated_filename())
  print('Saving -> {}'.format(blob_name))
  model.save(os.path.join(drive_dest_folder, blob_name))


def load_latest_model(name):
  model_files = [
      f
      for f in os.listdir(drive_dest_folder)
      if name in f and 'h5' in f
  ]
  model_files.sort()
  latest_file_name = model_files[-1]
  latest_filepath = os.path.join(drive_dest_folder, latest_file_name)
  print('Loading -> {}'.format(latest_filepath))
  model = tf.keras.models.load_model(latest_filepath)
  return model

In [0]:
loading_latest = False

if loading_latest:
  model_encoder = load_latest_model('fuck_vox_encoder')
  model_decoder = load_latest_model('fuck_vox_decoder')
else:
  model_encoder = create_encoder()
  model_decoder = create_decoder()


total_training_epochs_autoencoder = 0
all_losses = []
model_encoder.summary()
model_decoder.summary()

In [0]:
model_autoencoder = Sequential([model_encoder, model_decoder])

model_autoencoder.compile(
    loss='mse',
    optimizer=optimizers.Adam(learning_rate=0.0001))

model_autoencoder.summary()

In [0]:
def print_random_guesses_autoencoder(model, data, num_guess):
  for x, _ in data.shuffle(total_train_examples_autoencoder).take(num_guess):
    guess = model.predict(tf.expand_dims(x, 0))
    guess = tf.squeeze(guess, 0)
    combined = tf.concat([x, guess], 0)
    draw_spectrum(combined.numpy())


print_random_guesses_autoencoder(model_autoencoder, train_data_autoencoder, 3)

In [0]:
num_epochs = 16
batch_size = 4
training_samples = total_train_examples_autoencoder
total_batches = int(training_samples / batch_size)
display_step = int(total_batches / 10)

temp_dataset = train_data_autoencoder.take(training_samples)


for epoch in range(num_epochs):
  tds = temp_dataset.shuffle(training_samples)
  tds = tds.batch(batch_size).prefetch(1)
  print(
      'Epoch', epoch + 1, '/', num_epochs,
      '(', total_training_epochs_autoencoder, 'total )'
   )
  for i, (batch_x, batch_y) in enumerate(tds.take(total_batches)):
      loss = model_autoencoder.train_on_batch(batch_x, batch_y)
      all_losses += [loss]
      if (i * batch_size) % display_step == 0:
        print(
            '\tBatch', i, '/', total_batches
        )
  plt.plot(all_losses)
  plt.show()
  print_random_guesses_autoencoder(
      model_autoencoder, train_data_autoencoder, 5)
  total_training_epochs_autoencoder += 1


print('done')

In [0]:
save_model(model_encoder, 'fuck_vox_encoder')
save_model(model_decoder, 'fuck_vox_decoder')