<a href="https://colab.research.google.com/github/gabrielkerr/deep_learning_fall_2018/blob/master/Keras_Style_Transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup


## Imports and Installs

In [1]:
# Installing packages and cloning git repos
!pip install music21
!pip install h5py
!git clone https://github.com/Skuldur/Classical-Piano-Composer.git

fatal: destination path 'Classical-Piano-Composer' already exists and is not an empty directory.


In [2]:
# imports
import keras
from keras import backend as K
import tensorflow as tf
import music21
import h5py
import os
import numpy as np

Using TensorFlow backend.


In [0]:
os.chdir('Classical-Piano-Composer')

## Load Pretrained Model

In [0]:
# Load trained model
from keras.layers import LSTM, Dropout, Dense, Activation
from keras import Sequential

model = Sequential()
model.add(LSTM(
    512,
    input_shape=(100, 1),
    return_sequences=True
))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(359))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
# Load the weights to each node
model.load_weights('new_weights.hdf5')

In [5]:
for layer in model.layers:
  print(str(type(layer)))

<class 'keras.layers.recurrent.LSTM'>
<class 'keras.layers.core.Dropout'>
<class 'keras.layers.recurrent.LSTM'>
<class 'keras.layers.core.Dropout'>
<class 'keras.layers.recurrent.LSTM'>
<class 'keras.layers.core.Dense'>
<class 'keras.layers.core.Dropout'>
<class 'keras.layers.core.Dense'>
<class 'keras.layers.core.Activation'>


# Define Loss Function

In [0]:
def content_loss(input_sequence, output_sequence):
  return tf.linalg.norm(input_sequence - output_sequence)


def style_loss(trained_model, new_model):
  num_layers = len(trained_model.layers)
  loss = 0
  
  # Loop through weights
  for i in range(num_layers):
    trained_layer = trained_model.layers[i]
    new_layer = new_model.layers[i]
    # Check if layer is LSTM or Dense
    if not (str(type(trained_layer) == "<class 'keras.layers.recurrent.LSTM'>")) and not (str(type(trained_layer) == "<class 'keras.layers.core.Dense'>")):
      continue
  
    trained_weights = trained_layer.get_weights()
    new_weights = new_layer.get_weights()
    for j in range(len(trained_layer.get_weights())):
      loss += tf.linalg.norm(trained_weights[j] - new_weights[j])
      
    
  
  return loss


def total_loss(input_sequence, output_sequence, trained_model, new_model, alpha, beta):
  return alpha*content_loss(input_sequence, output_sequence) + beta*style_loss(trained_model, new_model)

In [7]:

style_loss(model, model)

<tf.Tensor 'add_12:0' shape=() dtype=float32>

In [8]:
input_sequence = tf.zeros(shape=[2,2])
output_sequence = tf.zeros(shape=[2,2])

with tf.Session() as sess:
  init = tf.global_variables_initializer()
  sess.run(init)
  x = sess.run(content_loss(input_sequence, output_sequence))
  y = sess.run(total_loss(input_sequence, output_sequence, model, model, 1, 1))
  print(y)

0.0


# Generate a Sequence

In [0]:
from lstm import get_notes, prepare_sequences

In [0]:
""" This module generates notes for a midi file using the
  trained neural network """
import pickle
import numpy
import keras
from music21 import instrument, note, stream, chord
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation

def generate(idx, notes):
  """ Generate a piano midi file """
  #load the notes used to train the model
  #notes = get_notes()

  # Get all pitch names
  pitchnames = sorted(set(item for item in notes))
  # Get all pitch names
  n_vocab = len(set(notes))

  network_input, normalized_input = prepare_sequences(notes, pitchnames, n_vocab)
  model = create_network(normalized_input, n_vocab)
  prediction_output, pattern = generate_notes(model, network_input, idx, pitchnames, n_vocab)
  create_midi(prediction_output)
  
  return pattern


def prepare_sequences(notes, pitchnames, n_vocab):
  """ Prepare the sequences used by the Neural Network """
  # map between notes and integers and back
  note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

  sequence_length = 100
  network_input = []
  output = []
  for i in range(0, len(notes) - sequence_length, 1):
      sequence_in = notes[i:i + sequence_length]
      sequence_out = notes[i + sequence_length]
      network_input.append([note_to_int[char] for char in sequence_in])
      output.append(note_to_int[sequence_out])

  n_patterns = len(network_input)

  # reshape the input into a format compatible with LSTM layers
  normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
  # normalize input
  normalized_input = normalized_input / float(n_vocab)

  return (network_input, normalized_input)

def create_network(network_input, n_vocab):
  """ create the structure of the neural network """
  model = Sequential()
  model.add(LSTM(
      512,
      input_shape=(network_input.shape[1], network_input.shape[2]),
      return_sequences=True
  ))
  model.add(Dropout(0.3))
  model.add(LSTM(512, return_sequences=True))
  model.add(Dropout(0.3))
  model.add(LSTM(512))
  model.add(Dense(256))
  model.add(Dropout(0.3))
  model.add(Dense(n_vocab+1))
  model.add(Activation('softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

  # Load the weights to each node
  model.load_weights('new_weights.hdf5')

  return model

def generate_notes(model, network_input, idx, pitchnames, n_vocab):
  """ Generate notes from the neural network based on a sequence of notes """
  # pick a random sequence from the input as a starting point for the prediction
  #start = numpy.random.randint(0, len(network_input)-1)
  start = idx

  int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

  pattern = network_input[start]
  prediction_output = []

  # generate 500 notes
  #for note_index in range(500):
  for note_index in range(100):

      prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
      prediction_input = prediction_input / float(n_vocab)

      prediction = model.predict(prediction_input, verbose=0)

      index = numpy.argmax(prediction)
      result = int_to_note[index]
      prediction_output.append(result)

      pattern.append(index)
      pattern = pattern[1:len(pattern)]

  return prediction_output, pattern

def create_midi(prediction_output):
  """ convert the output from the prediction to notes and create a midi file
      from the notes """
  offset = 0
  output_notes = []

  # create note and chord objects based on the values generated by the model
  for pattern in prediction_output:
      # pattern is a chord
      if ('.' in pattern) or pattern.isdigit():
          notes_in_chord = pattern.split('.')
          notes = []
          for current_note in notes_in_chord:
              new_note = note.Note(int(current_note))
              new_note.storedInstrument = instrument.Piano()
              notes.append(new_note)
          new_chord = chord.Chord(notes)
          new_chord.offset = offset
          output_notes.append(new_chord)
      # pattern is a note
      else:
          new_note = note.Note(pattern)
          new_note.offset = offset
          new_note.storedInstrument = instrument.Piano()
          output_notes.append(new_note)

      # increase offset each iteration so that notes do not stack
      offset += 0.5

  midi_stream = stream.Stream(output_notes)

  midi_stream.write('midi', fp='test_output.mid')
  
  
def generate_notes_from_melody(normalized_meloday_seq, network_input, idx, pitchnames, n_vocab):
  """ Generate notes from the neural network based on a sequence of notes """
  # pick a random sequence from the input as a starting point for the prediction
  #start = numpy.random.randint(0, len(network_input)-1)
  start = idx

  int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

  #pattern = network_input[start]
  pattern = normalized_meloday_seq
  prediction_output = []

  # generate 500 notes
  #for note_index in range(500):
  for note_index in range(100):

      prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
      prediction_input = prediction_input / float(n_vocab)

      prediction = model.predict(prediction_input, verbose=0)

      index = numpy.argmax(prediction)
      result = int_to_note[index]
      prediction_output.append(result)

      pattern.append(index)
      pattern = pattern[1:len(pattern)]

  return prediction_output, pattern


def generate_from_melody(normalized_melody_seq, notes):
  """ Generate a piano midi file """
  #load the notes used to train the model
  #notes = get_notes()

  # Get all pitch names
  pitchnames = sorted(set(item for item in notes))
  # Get all pitch names
  n_vocab = len(set(notes))

  network_input, normalized_input = prepare_sequences(notes, pitchnames, n_vocab)
  model = create_network(normalized_input, n_vocab)
  prediction_output, pattern = generate_notes(model, network_input, normalized_melody_seq, pitchnames, n_vocab)
  create_midi(prediction_output)
  
  return pattern

In [11]:
notes = get_notes()

Parsing midi_songs/Still_Alive-1.mid
Parsing midi_songs/sobf.mid
Parsing midi_songs/ultimafro.mid
Parsing midi_songs/Ff7-Jenova_Absolute.mid
Parsing midi_songs/traitor.mid
Parsing midi_songs/Oppressed.mid
Parsing midi_songs/ff1battp.mid
Parsing midi_songs/Cids.mid
Parsing midi_songs/decisive.mid
Parsing midi_songs/JENOVA.mid
Parsing midi_songs/FFIXQuMarshP.mid
Parsing midi_songs/caitsith.mid
Parsing midi_songs/costadsol.mid
Parsing midi_songs/redwings.mid
Parsing midi_songs/FF3_Third_Phase_Final_(Piano).mid
Parsing midi_songs/AT.mid
Parsing midi_songs/Ff7-Cinco.mid
Parsing midi_songs/Finalfantasy5gilgameshp.mid
Parsing midi_songs/path_of_repentance.mid
Parsing midi_songs/FFVII_BATTLE.mid
Parsing midi_songs/Finalfantasy6fanfarecomplete.mid
Parsing midi_songs/ff7-mainmidi.mid
Parsing midi_songs/FFX_-_Ending_Theme_(Piano_Version)_-_by_Angel_FF.mid
Parsing midi_songs/ahead_on_our_way_piano.mid
Parsing midi_songs/Life_Stream.mid
Parsing midi_songs/Eternal_Harvest.mid
Parsing midi_songs/Sute

In [0]:
#pitchnames = sorted(set(item for item in notes))
n_vocab = len(set(notes))
input_sequences, output_sequences = prepare_sequences(notes, pitchnames, n_vocab)

In [0]:
preds = np.array(generate(0, notes))

In [18]:
np.array(preds) / n_vocab

array([0.88826816, 0.94413408, 0.94134078, 0.88826816, 0.82960894,
       0.88826816, 0.88826816, 0.41899441, 0.88826816, 0.79608939,
       0.79329609, 0.88826816, 0.94134078, 0.88826816, 0.82960894,
       0.88826816, 0.94134078, 0.78212291, 0.82960894, 0.9273743 ,
       0.92458101, 0.54748603, 0.92458101, 0.99441341, 0.84916201,
       0.92458101, 0.44134078, 0.92458101, 0.85195531, 0.79329609,
       0.92458101, 0.89106145, 0.99162011, 0.92458101, 0.84916201,
       0.9273743 , 0.88826816, 0.92458101, 0.79608939, 0.79329609,
       0.79608939, 0.79329609, 0.88826816, 0.94413408, 0.94134078,
       0.88826816, 0.82960894, 0.88826816, 0.94134078, 0.88826816,
       0.79329609, 0.88826816, 0.94134078, 0.88826816, 0.61731844,
       0.32122905, 0.41899441, 0.88826816, 0.9273743 , 0.79329609,
       0.88826816, 0.94413408, 0.94134078, 0.88826816, 0.82960894,
       0.88826816, 0.94134078, 0.88826816, 0.79329609, 0.88826816,
       0.94134078, 0.78212291, 0.61731844, 0.31005587, 0.41899

# Define Training Loop

In [0]:
def transfer_style(input_melody, trained_model, prog_model, hparams):
  
  # Parse the dictionary of hyperparameters.
  learning_rate = None
  alpha = None
  beta = None
  epse = None
  if not hparams['learning_rate']:
    learning_rate = 0.01
  else:
    learning_rate = hparams['learning_rate']
  if not hparams['alpha']:
    alpha = 1.0
  else:
    alpha = hparams['alpha']
  if not hparams['beta']:
    beta = 1.0
  else:
    beta = hparams['beta']
  if not hparams['eps']:
    eps = 0.1
  else:
    eps = hparams['eps']:
  
  # Define some variables that will get updated.
  vars_to_update = {}
  
  # Figure out which variables we will need to update.
  for i in range(num_layers):
    layer = prog_model.layers[i]
    # Check if layer is LSTM or Dense
    if not (str(type(layer) == "<class 'keras.layers.recurrent.LSTM'>")) and \
       not (str(type(layer) == "<class 'keras.layers.core.Dense'>")):
      continue
      
    var_init = tf.constant(np.random.rand(layer.get_weights().shape))
    vars_to_update['layer_' + str(i)] = tf.Variable(initial_value=var_init)

  # Define the training loop.
  last_tot_loss = 999999999999.99
  output_sequence = None
  with tf.Session() as sess:
    satisfied = False
    while not satisified:
      # Generate an output sequence from the input sequence.
      output_sequence = generate_from_melody(input_melody, notes)

      # Pass input seq and output seq and two different models to total_loss.
      tot_loss = total_loss(input_melody, output_sequence, trained_model, prog_model, alpha, beta)

      # Update the weights in each layer of prog_model based on the total loss.
      update_op = tf.trainRMSPropOptimizer(learning_rate) \
        .minimize(tot_loss, var_list=[vars_to_update.values])
      
      # Decide when we are satisfied.
      satisfied = abs(last_tot_loss - tot_loss) < eps      

  return output_sequence
        

# Generate Input Melodies