# INSTRUCTIONS

Workflow: Run each cell in order. Put or extract data into relevant folders as defined in section 2. Make remaining folders in drive.  **Cells denoted with * may require extra action.**


```
Folder Structure Suggested for Section 2:
drive/
    train_data/ <- input midis go here
    train_output/
        graphs/ <- train/val accuracy plots go here
        intermed/ <- intermediate weights, preprocessing go here
        stats/ (currently not being used)
        midi/ <- output midis go here
```




# 1 Imports

In [1]:
# Import Data Manip, Debug
import glob
import pickle
import numpy as np
import pandas as pd
import pdb
import matplotlib.pyplot as plt

# Import Music21
!pip install music21
from music21 import converter, instrument, note, chord, stream

# Import Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import CuDNNLSTM
from keras.layers import CuDNNGRU
from keras.layers import Activation
from keras.layers import Bidirectional
from keras.layers import RepeatVector
from keras.layers import Flatten
from keras.regularizers import L1L2
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.callbacks import History

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2



Using TensorFlow backend.


In [2]:
# stuff needed for colaboratory to connect with drive
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

E: Package 'python-software-properties' has no installation candidate
Selecting previously unselected package google-drive-ocamlfuse.
(Reading database ... 131323 files and directories currently installed.)
Preparing to unpack .../google-drive-ocamlfuse_0.7.1-0ubuntu3~ubuntu18.04.1_amd64.deb ...
Unpacking google-drive-ocamlfuse (0.7.1-0ubuntu3~ubuntu18.04.1) ...
Setting up google-drive-ocamlfuse (0.7.1-0ubuntu3~ubuntu18.04.1) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
··········
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=ht

# 2 Mount and Set Directories*

In [0]:
# mount drive
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
DATA_FOLDER = 'drive/Project/train_data/edm_all'
GRAPHS_FOLDER = 'drive/Project/train_output/graphs'
MIDI_OUTPUT_FOLDER = 'drive/Project/train_output/midi'
INTERMED_FOLDER = 'drive/Project/train_output/intermed'
STATS_FOLDER = 'drive/Project/train_output/stats'

# data specific
EDM_CORPUS = '/edm_'

# SET THIS TO ONE OF ABOVE (ACTION)
CORPUS = EDM_CORPUS

# Notewise Root and Rest Learning

# 3 Set Training Parameters*

In [0]:
# SET PARAMETERS (ACTION)
RESTS = True
ROOT_EXTRACTION = True
DURATION_BATCH_SIZE = 256
NOTE_BATCH_SIZE = 128
# SPECIFY PARAMETERS TO TEST AS LIST
DROPOUTS = 0
MODEL_SIZES = 256
EPOCHS = 200

# 4 Preprocess Data (MIDI Data Into Notes Corpus and Duration Corpus)

In [6]:
# Test

notes_corpus = []
durations_corpus = []

for file in glob.glob(DATA_FOLDER + "/*.mid"):
    try:
      print("Extracting MIDI File: ", file)
      midi_stream = converter.parse(file)

      notes = None

      partition = instrument.partitionByInstrument(midi_stream)

      if not RESTS:
        # No rests
        if partition: 
            notes = partition.parts[0].recurse()
        else: 
            notes = midi_stream.flat.notes
      else:
        # With rests
        if partition: 
            notes = partition.parts[0].recurse()
        else: 
            notes = midi_stream.notesAndRests
          
      in_song_notes = []
      in_song_durations = []
      for element in notes:
          in_song_durations.append(element.duration.quarterLength)
          if isinstance(element, note.Note):
              in_song_notes.append(str(element.pitch))
          elif RESTS and isinstance(element, note.Rest):
              in_song_notes.append("R")
          elif isinstance(element, chord.Chord):
              if ROOT_EXTRACTION:
                  in_song_notes.append(element.root().nameWithOctave)
              else:
                  in_song.append('.'.join(str(n) for n in element.normalOrder))
    except:
      pass
    
    notes_corpus.append(in_song_notes)
    durations_corpus.append(in_song_durations)
            
# Write
with open(INTERMED_FOLDER + CORPUS + 'notes_corpus', 'wb+') as filepath:
    pickle.dump(notes_corpus, filepath)
    
with open(INTERMED_FOLDER + CORPUS + 'durations_corpus', 'wb+') as filepath:
    pickle.dump(durations_corpus, filepath)

Extracting MIDI File:  drive/Project/train_data/edm_all/Afrojack _ David Guetta - Another Life  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Avicii - Lonely Together.mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Axwell _ Ingrosso - More Than You Know  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Sing Me To Sleep  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Avicii - Without You.mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alesso ft. Matthew Koma - Years (Original Mix) (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Tired  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Alone  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/tr

# 5 Preprocess Corpus Into Train Data

## 5.1 Notes Corpus

In [7]:
# load notes corpus
with open(INTERMED_FOLDER + CORPUS + 'notes_corpus', 'rb') as filepath:
    notes_corpus = pickle.load(filepath)
    
# If doing learning for one song at a time only
flattened_notes_corpus = []
for song_notes in notes_corpus:
    flattened_notes_corpus += song_notes

vocab_size = len(set(flattened_notes_corpus))
print(vocab_size)

# Produce input output sequences
window_size = 60
note_sequence_input = []
next_note_output = []

notes = sorted(set(flattened_notes_corpus))
note2int = dict((note, num) for num, note in enumerate(notes))

for i in range(0, len(notes_corpus)):
    for j in range(0, len(notes_corpus[i]) - window_size):
        current_sequence = [note2int[note] for note in notes_corpus[i][j:window_size+j]]
        next_note = note2int[notes_corpus[i][window_size+j]]
        note_sequence_input.append(current_sequence)
        next_note_output.append(next_note)

65


In [8]:
# check train and label shapes
training_data = np.reshape(note_sequence_input, (len(note_sequence_input), window_size , 1))
training_data = training_data / float(vocab_size)
print('Train shape: ' + str(training_data.shape))
training_label = np_utils.to_categorical(next_note_output)
print('Label shape: ' + str(training_label.shape))

Train shape: (11392, 60, 1)
Label shape: (11392, 65)


## 5.2 Durations Corpus

In [9]:
# load notes corpus
with open(INTERMED_FOLDER + CORPUS + 'durations_corpus', 'rb') as filepath:
    duration_corpus = pickle.load(filepath)

# Learn on one song at a time
flattened_duration_corpus = []
for song_durations in duration_corpus:
    flattened_duration_corpus += song_durations
    
import collections
counter = collections.Counter(flattened_duration_corpus)
print(counter)


duration_vocab_size = len(set(flattened_duration_corpus))
print(duration_vocab_size)

# Produce input output sequences
duration_window_size = 30
duration_sequence_input = []
next_duration_output = []

durations = sorted(set(flattened_duration_corpus))
duration2int = dict((duration, num) for num, duration in enumerate(durations))

print(duration2int)
# Write
with open(INTERMED_FOLDER + "/edm_duration_counter", 'wb+') as filepath:
    pickle.dump(counter, filepath)

for i in range(0, len(duration_corpus)):
    for j in range(0, len(duration_corpus[i]) - duration_window_size):
        current_duration_sequence = [duration2int[note] for note in duration_corpus[i][j:duration_window_size+j]]
        next_duration = duration2int[duration_corpus[i][duration_window_size+j]]
        duration_sequence_input.append(current_duration_sequence)
        next_duration_output.append(next_duration)

Counter({0.25: 4993, 0.5: 4383, 0.0: 966, Fraction(1, 3): 935, 1.0: 735, Fraction(1, 6): 729, 0.75: 461, 4.0: 165, 2.0: 122, Fraction(2, 3): 93, Fraction(1, 12): 86, Fraction(5, 12): 72, 1.5: 55, 3.0: 41, Fraction(7, 6): 32, 1.75: 30, 1.25: 29, 2.25: 25, 4.5: 22, 3.5: 13, 6.5: 13, 8.0: 11, 4.25: 9, 2.5: 9, Fraction(25, 6): 7, 5.0: 6, 7.0: 5, 8.5: 4, 32.0: 4, 6.0: 4, Fraction(5, 3): 3, 48.0: 2, 68.0: 2, 176.0: 2, 80.0: 2, 16.0: 2, Fraction(23, 3): 1, 47.5: 1, 3.25: 1, 5.75: 1, 200.5: 1, 36.25: 1, 64.25: 1, 84.25: 1, 72.5: 1, 72.25: 1, 14.5: 1, 9.0: 1, 27.5: 1, 61.0: 1, 161.0: 1, 5.25: 1, 4.75: 1, 94.5: 1, 128.25: 1, 18.0: 1, 39.5: 1, Fraction(553, 6): 1, Fraction(4, 3): 1, 32.5: 1, 36.0: 1})
61
{0.0: 0, Fraction(1, 12): 1, Fraction(1, 6): 2, 0.25: 3, Fraction(1, 3): 4, Fraction(5, 12): 5, 0.5: 6, Fraction(2, 3): 7, 0.75: 8, 1.0: 9, Fraction(7, 6): 10, 1.25: 11, Fraction(4, 3): 12, 1.5: 13, Fraction(5, 3): 14, 1.75: 15, 2.0: 16, 2.25: 17, 2.5: 18, 3.0: 19, 3.25: 20, 3.5: 21, 4.0: 22, Fra

In [10]:
# check train and label shapes
duration_training_data = np.reshape(duration_sequence_input, (len(duration_sequence_input), duration_window_size , 1))
duration_training_data = duration_training_data / float(duration_vocab_size)
print('Train shape: ' + str(duration_training_data.shape))
duration_training_label = np_utils.to_categorical(next_duration_output)
print('Label shape: ' + str(duration_training_label.shape))

Train shape: (12837, 30, 1)
Label shape: (12837, 61)


# 6 Train

## 6.1 Helpers to Create Model

In [0]:
def create_model(network_input, n_vocab, model_size, dropout):
  model = Sequential()
  reg = L1L2(0, 0)
  model.add(LSTM(
      model_size,
      input_shape=(network_input.shape[1], network_input.shape[2]),
      return_sequences=True,
      dropout=dropout, recurrent_dropout=0.3
  ))
  model.add(CuDNNLSTM(model_size, return_sequences=True, kernel_regularizer=reg))
  model.add(Dropout(dropout))
  model.add(CuDNNLSTM(model_size, kernel_regularizer=reg))
  model.add(Dense(128))
  model.add(Dropout(dropout))
  model.add(Dense(n_vocab))
  model.add(Activation('softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

def create_callback_list(model_size, dropout, model_type):
  filepath = INTERMED_FOLDER + '/%skarpathy-model-weights-%s-%s-%s.hdf5' % (CORPUS, model_type, model_size, dropout)
  model_checkpoint = ModelCheckpoint(
    filepath,
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
  )
  return [model_checkpoint], filepath

# acc history
def setup_plot(dropout, size):
  plt.title('Model Accuracy vs. Epoc with Dropout=%s Size=%s' % (dropout, size))
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  
def plot_history(history, model_type, dropout, size):
  plt.plot(history.history['acc'], label="%s train accuracy" % model_type)
  plt.plot(history.history['val_acc'], label="%s val accuracy" % model_type)

def save_plot(file_path):
  plt.legend()
  plt.savefig(file_path)
  plt.clf()
  
def predict_duration(model, WEIGHT_PATH):
  # Prediction
  model.load_weights(WEIGHT_PATH)
  starting_sequence = np.random.randint(219, size=duration_window_size)
  pattern_sequence = starting_sequence.tolist()
  prediction_output = []

  duration2note = dict((num, note) for num, note in enumerate(durations))
  print (duration2note)
  
  for i in range(400):
      prediction_input = np.reshape(pattern_sequence, (1, len(pattern_sequence), 1))
      prediction_input = prediction_input / float(duration_vocab_size)

      if i%3 == 0:   
          print('\r', 'Predicting.  Duration: ', i, end='')  
      if i%3 == 1:   
          print('\r', 'Predicting.. Duration: ', i, end='')
      if i%3 == 2:   
          print('\r', 'Predicting...Duration: ', i, end='')
      prediction = model.predict(prediction_input, verbose=0)

      prediction_values = np.arange(len(prediction[0]))
      prediction_prob = np.asarray(list(prediction[0])) / float(sum(prediction[0]))

      # Most probable note prediction
#       index = np.argmax(prediction)
#       note_instance = duration2note[index]
#       prediction_output.append(note_instance)
      index = np.random.choice(prediction_values, 1, p=prediction_prob)
      note_instance = duration2note[int(index[0])]
      prediction_output.append(note_instance)

      pattern_sequence.append(index)
      pattern_sequence = pattern_sequence[1:len(pattern_sequence)]    

  prediction_output = prediction_output[300:len(prediction_output)]

  # Write
  with open(INTERMED_FOLDER + ("%sduration_prediction_output" % CORPUS), 'wb+') as filepath:
      pickle.dump(prediction_output, filepath)
  
  return prediction_output

def predict_note(model, WEIGHT_PATH):
  model.load_weights(WEIGHT_PATH)
  # Prediction
  starting_sequence = np.random.randint(219, size=window_size)
  pattern_sequence = starting_sequence.tolist()
  prediction_output = []

  int2note = dict((num, note) for num, note in enumerate(notes))
  print (int2note)

  for i in range(400):
      prediction_input = np.reshape(pattern_sequence, (1, len(pattern_sequence), 1))
      prediction_input = prediction_input / float(vocab_size)

      if i%3 == 0:   
          print('\r', 'Predicting.  Note: ', i, end='')  
      if i%3 == 1:   
          print('\r', 'Predicting.. Note: ', i, end='')
      if i%3 == 2:   
          print('\r', 'Predicting...Note: ', i, end='')
      prediction = model.predict(prediction_input, verbose=0)

#       prediction_values = np.arange(len(prediction[0]))
#       prediction_prob = np.asarray(list(prediction[0])) / float(sum(prediction[0]))
#       print(prediction_prob)

      # Most probable note prediction
      index = np.argmax(prediction)
      note_instance = int2note[index]
      prediction_output.append(note_instance)

      # Predict based on prob dist
#       index = np.random.choice(prediction_values, 1, p=prediction_prob)
  #     print(index[0])
  #     print(type(index[0]))
#       note_instance = int2note[int(index[0])]
#       prediction_output.append(note_instance)

      pattern_sequence.append(index)
      pattern_sequence = pattern_sequence[1:len(pattern_sequence)]    

  prediction_output = prediction_output[300:len(prediction_output)]
  # Write
  with open(INTERMED_FOLDER + ("%snotes_prediction_output" % CORPUS), 'wb+') as filepath:
      pickle.dump(prediction_output, filepath)
  return prediction_output

def output_midi(prediction_output, duration_prediction_output, dropout, model_size):
  offset = 0
  output_notes = []
  for pattern, duration in zip(prediction_output, duration_prediction_output):

      if ('.' in pattern) or pattern.isdigit():
          chord_array = pattern.split('.')
          chord_notes = []
          for note_instance in chord_array:
              note_object = note.Note(int(note_instance))
              note_object.duration.quarterLength = duration
              note_object.storedInstrument = instrument.Piano()
              chord_notes.append(note_object)
          chord_object = chord.Chord(chord_notes)
          chord_object.offset = offset
          output_notes.append(chord_object)
      elif 'R' == pattern:
          note_object = note.Rest()
          note_object.duration.quarterLength = duration
          note_object.offset = offset
          output_notes.append(note_object)
      else:
          note_object = note.Note(pattern)
          note_object.duration.quarterLength = duration
          note_object.offset = offset
          note_object.storedInstrument = instrument.Piano()
          output_notes.append(note_object)

      offset += 0.5

  midi_stream = stream.Stream(output_notes)
  midi_stream.write('midi', fp=MIDI_OUTPUT_FOLDER + CORPUS + '%s_%s.mid' % (dropout, model_size))
  print('\nWrote midi...')

## 6.2 Preprocessing Optimization

In [12]:
notes_histories = {}
duration_histories = {}

dropout, model_size = DROPOUTS, MODEL_SIZES
setup_plot(dropout, model_size)
print('Running duration training on notewise with rests:%s and root extraction:%s' % (RESTS, ROOT_EXTRACTION))
duration_callbacks, duration_weight_path = create_callback_list('duration', dropout, model_size)
duration_model = create_model(duration_training_data, duration_vocab_size, model_size, dropout)
duration_histories[(dropout, model_size)] = duration_model.fit(duration_training_data, duration_training_label, epochs=EPOCHS, batch_size=DURATION_BATCH_SIZE, callbacks=duration_callbacks, validation_split=0.2)
plot_history(duration_histories[(dropout, model_size)], 'Durations', dropout, model_size)
# output intermed duration
duration_prediction = predict_duration(duration_model, duration_weight_path)

print('\n\nRunning notes training on d:%s s:%s' % (dropout, model_size))
notes_callbacks, note_weight_path = create_callback_list('notes', dropout, model_size)
notes_model = create_model(training_data, vocab_size, model_size, dropout)
notes_histories[(dropout, model_size)] = notes_model.fit(training_data, training_label, epochs=EPOCHS, batch_size=NOTE_BATCH_SIZE, callbacks=notes_callbacks, validation_split=0.2)
plot_history(notes_histories[(dropout, model_size)], 'Notes', dropout, model_size)
save_plot(GRAPHS_FOLDER + EDM_CORPUS + 'dropout=%s_size=%s' % (dropout, model_size))
# output intermed notes
note_prediction = predict_note(notes_model, note_weight_path)

# output final midi
output_midi(note_prediction, duration_prediction, dropout, model_size)

Running duration training on notewise with rests:True and root extraction:True
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Train on 10269 samples, validate on 2568 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/

<Figure size 576x396 with 0 Axes>

# Notewise Root

# 3 Set Training Parameters*

In [0]:
# SET PARAMETERS (ACTION)
RESTS = False
ROOT_EXTRACTION = True
DURATION_BATCH_SIZE = 256
NOTE_BATCH_SIZE = 128
# SPECIFY PARAMETERS TO TEST AS LIST
DROPOUTS = 0
MODEL_SIZES = 256
EPOCHS = 200

# 4 Preprocess Data (MIDI Data Into Notes Corpus and Duration Corpus)

In [14]:
# Test

notes_corpus = []
durations_corpus = []

for file in glob.glob(DATA_FOLDER + "/*.mid"):
    try:
      print("Extracting MIDI File: ", file)
      midi_stream = converter.parse(file)

      notes = None

      partition = instrument.partitionByInstrument(midi_stream)

      if not RESTS:
        # No rests
        if partition: 
            notes = partition.parts[0].recurse()
        else: 
            notes = midi_stream.flat.notes
      else:
        # With rests
        if partition: 
            notes = partition.parts[0].recurse()
        else: 
            notes = midi_stream.notesAndRests
          
      in_song_notes = []
      in_song_durations = []
      for element in notes:
          in_song_durations.append(element.duration.quarterLength)
          if isinstance(element, note.Note):
              in_song_notes.append(str(element.pitch))
          elif RESTS and isinstance(element, note.Rest):
              in_song_notes.append("R")
          elif isinstance(element, chord.Chord):
              if ROOT_EXTRACTION:
                  in_song_notes.append(element.root().nameWithOctave)
              else:
                  in_song.append('.'.join(str(n) for n in element.normalOrder))
    except:
      pass
    
    notes_corpus.append(in_song_notes)
    durations_corpus.append(in_song_durations)
            
# Write
with open(INTERMED_FOLDER + CORPUS + 'notes_corpus', 'wb+') as filepath:
    pickle.dump(notes_corpus, filepath)
    
with open(INTERMED_FOLDER + CORPUS + 'durations_corpus', 'wb+') as filepath:
    pickle.dump(durations_corpus, filepath)

Extracting MIDI File:  drive/Project/train_data/edm_all/Afrojack _ David Guetta - Another Life  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Avicii - Lonely Together.mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Axwell _ Ingrosso - More Than You Know  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Sing Me To Sleep  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Avicii - Without You.mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alesso ft. Matthew Koma - Years (Original Mix) (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Tired  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Alone  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/tr

# 5 Preprocess Corpus Into Train Data

## 5.1 Notes Corpus

In [15]:
# load notes corpus
with open(INTERMED_FOLDER + CORPUS + 'notes_corpus', 'rb') as filepath:
    notes_corpus = pickle.load(filepath)
    
# If doing learning for one song at a time only
flattened_notes_corpus = []
for song_notes in notes_corpus:
    flattened_notes_corpus += song_notes

vocab_size = len(set(flattened_notes_corpus))
print(vocab_size)

# Produce input output sequences
window_size = 60
note_sequence_input = []
next_note_output = []

notes = sorted(set(flattened_notes_corpus))
note2int = dict((note, num) for num, note in enumerate(notes))

for i in range(0, len(notes_corpus)):
    for j in range(0, len(notes_corpus[i]) - window_size):
        current_sequence = [note2int[note] for note in notes_corpus[i][j:window_size+j]]
        next_note = note2int[notes_corpus[i][window_size+j]]
        note_sequence_input.append(current_sequence)
        next_note_output.append(next_note)

68


In [16]:
# check train and label shapes
training_data = np.reshape(note_sequence_input, (len(note_sequence_input), window_size , 1))
training_data = training_data / float(vocab_size)
print('Train shape: ' + str(training_data.shape))
training_label = np_utils.to_categorical(next_note_output)
print('Label shape: ' + str(training_label.shape))

Train shape: (6258, 60, 1)
Label shape: (6258, 68)


## 5.2 Durations Corpus

In [17]:
# load notes corpus
with open(INTERMED_FOLDER + CORPUS + 'durations_corpus', 'rb') as filepath:
    duration_corpus = pickle.load(filepath)

# Learn on one song at a time
flattened_duration_corpus = []
for song_durations in duration_corpus:
    flattened_duration_corpus += song_durations
    
import collections
counter = collections.Counter(flattened_duration_corpus)
print(counter)


duration_vocab_size = len(set(flattened_duration_corpus))
print(duration_vocab_size)

# Produce input output sequences
duration_window_size = 30
duration_sequence_input = []
next_duration_output = []

durations = sorted(set(flattened_duration_corpus))
duration2int = dict((duration, num) for num, duration in enumerate(durations))

print(duration2int)
# Write
with open(INTERMED_FOLDER + "/edm_duration_counter", 'wb+') as filepath:
    pickle.dump(counter, filepath)

for i in range(0, len(duration_corpus)):
    for j in range(0, len(duration_corpus[i]) - duration_window_size):
        current_duration_sequence = [duration2int[note] for note in duration_corpus[i][j:duration_window_size+j]]
        next_duration = duration2int[duration_corpus[i][duration_window_size+j]]
        duration_sequence_input.append(current_duration_sequence)
        next_duration_output.append(next_duration)

Counter({0.25: 5380, 0.5: 4464, Fraction(1, 3): 1009, 0.0: 998, 1.0: 741, Fraction(1, 6): 729, 0.75: 485, 4.0: 166, 2.0: 130, Fraction(2, 3): 98, Fraction(1, 12): 86, Fraction(5, 12): 72, 1.5: 59, 3.0: 41, Fraction(7, 6): 32, 1.25: 31, 1.75: 30, 2.25: 25, 4.5: 22, 3.5: 13, 6.5: 13, 8.0: 11, 4.25: 9, 2.5: 9, Fraction(25, 6): 7, 5.0: 6, 7.0: 5, 8.5: 4, 32.0: 4, 6.0: 4, Fraction(5, 3): 3, 48.0: 2, 68.0: 2, 176.0: 2, 80.0: 2, 16.0: 2, Fraction(23, 3): 1, 47.5: 1, 3.25: 1, 5.75: 1, 200.5: 1, 36.25: 1, 64.25: 1, 84.25: 1, 72.5: 1, 72.25: 1, 14.5: 1, 9.0: 1, 27.5: 1, 61.0: 1, 161.0: 1, 5.25: 1, 4.75: 1, 94.5: 1, 128.25: 1, 18.0: 1, 39.5: 1, Fraction(553, 6): 1, Fraction(4, 3): 1, 32.5: 1, 36.0: 1})
61
{0.0: 0, Fraction(1, 12): 1, Fraction(1, 6): 2, 0.25: 3, Fraction(1, 3): 4, Fraction(5, 12): 5, 0.5: 6, Fraction(2, 3): 7, 0.75: 8, 1.0: 9, Fraction(7, 6): 10, 1.25: 11, Fraction(4, 3): 12, 1.5: 13, Fraction(5, 3): 14, 1.75: 15, 2.0: 16, 2.25: 17, 2.5: 18, 3.0: 19, 3.25: 20, 3.5: 21, 4.0: 22, Fr

In [18]:
# check train and label shapes
duration_training_data = np.reshape(duration_sequence_input, (len(duration_sequence_input), duration_window_size , 1))
duration_training_data = duration_training_data / float(duration_vocab_size)
print('Train shape: ' + str(duration_training_data.shape))
duration_training_label = np_utils.to_categorical(next_duration_output)
print('Label shape: ' + str(duration_training_label.shape))

Train shape: (13030, 30, 1)
Label shape: (13030, 61)


# 6 Train

## 6.1 Helpers to Create Model

In [0]:
def create_model(network_input, n_vocab, model_size, dropout):
  model = Sequential()
  reg = L1L2(0, 0)
  model.add(LSTM(
      model_size,
      input_shape=(network_input.shape[1], network_input.shape[2]),
      return_sequences=True,
      dropout=dropout, recurrent_dropout=0.3
  ))
  model.add(CuDNNLSTM(model_size, return_sequences=True, kernel_regularizer=reg))
  model.add(Dropout(dropout))
  model.add(CuDNNLSTM(model_size, kernel_regularizer=reg))
  model.add(Dense(128))
  model.add(Dropout(dropout))
  model.add(Dense(n_vocab))
  model.add(Activation('softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

def create_callback_list(model_size, dropout, model_type):
  filepath = INTERMED_FOLDER + '/%skarpathy-model-weights-%s-%s-%s.hdf5' % (CORPUS, model_type, model_size, dropout)
  model_checkpoint = ModelCheckpoint(
    filepath,
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
  )
  return [model_checkpoint], filepath

# acc history
def setup_plot(dropout, size):
  plt.title('Model Accuracy vs. Epoc with Dropout=%s Size=%s' % (dropout, size))
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  
def plot_history(history, model_type, dropout, size):
  plt.plot(history.history['acc'], label="%s train accuracy" % model_type)
  plt.plot(history.history['val_acc'], label="%s val accuracy" % model_type)

def save_plot(file_path):
  plt.legend()
  plt.savefig(file_path)
  plt.clf()
  
def predict_duration(model, WEIGHT_PATH):
  # Prediction
  model.load_weights(WEIGHT_PATH)
  starting_sequence = np.random.randint(219, size=duration_window_size)
  pattern_sequence = starting_sequence.tolist()
  prediction_output = []

  duration2note = dict((num, note) for num, note in enumerate(durations))
  print (duration2note)
  
  for i in range(400):
      prediction_input = np.reshape(pattern_sequence, (1, len(pattern_sequence), 1))
      prediction_input = prediction_input / float(duration_vocab_size)

      if i%3 == 0:   
          print('\r', 'Predicting.  Duration: ', i, end='')  
      if i%3 == 1:   
          print('\r', 'Predicting.. Duration: ', i, end='')
      if i%3 == 2:   
          print('\r', 'Predicting...Duration: ', i, end='')
      prediction = model.predict(prediction_input, verbose=0)

      prediction_values = np.arange(len(prediction[0]))
      prediction_prob = np.asarray(list(prediction[0])) / float(sum(prediction[0]))

      # Most probable note prediction
#       index = np.argmax(prediction)
#       note_instance = duration2note[index]
#       prediction_output.append(note_instance)
      index = np.random.choice(prediction_values, 1, p=prediction_prob)
      note_instance = duration2note[int(index[0])]
      prediction_output.append(note_instance)

      pattern_sequence.append(index)
      pattern_sequence = pattern_sequence[1:len(pattern_sequence)]    

  prediction_output = prediction_output[300:len(prediction_output)]

  # Write
  with open(INTERMED_FOLDER + ("%sduration_prediction_output" % CORPUS), 'wb+') as filepath:
      pickle.dump(prediction_output, filepath)
  
  return prediction_output

def predict_note(model, WEIGHT_PATH):
  model.load_weights(WEIGHT_PATH)
  # Prediction
  starting_sequence = np.random.randint(219, size=window_size)
  pattern_sequence = starting_sequence.tolist()
  prediction_output = []

  int2note = dict((num, note) for num, note in enumerate(notes))
  print (int2note)

  for i in range(400):
      prediction_input = np.reshape(pattern_sequence, (1, len(pattern_sequence), 1))
      prediction_input = prediction_input / float(vocab_size)

      if i%3 == 0:   
          print('\r', 'Predicting.  Note: ', i, end='')  
      if i%3 == 1:   
          print('\r', 'Predicting.. Note: ', i, end='')
      if i%3 == 2:   
          print('\r', 'Predicting...Note: ', i, end='')
      prediction = model.predict(prediction_input, verbose=0)

#       prediction_values = np.arange(len(prediction[0]))
#       prediction_prob = np.asarray(list(prediction[0])) / float(sum(prediction[0]))
#       print(prediction_prob)

      # Most probable note prediction
      index = np.argmax(prediction)
      note_instance = int2note[index]
      prediction_output.append(note_instance)

      # Predict based on prob dist
#       index = np.random.choice(prediction_values, 1, p=prediction_prob)
  #     print(index[0])
  #     print(type(index[0]))
#       note_instance = int2note[int(index[0])]
#       prediction_output.append(note_instance)

      pattern_sequence.append(index)
      pattern_sequence = pattern_sequence[1:len(pattern_sequence)]    

  prediction_output = prediction_output[300:len(prediction_output)]
  # Write
  with open(INTERMED_FOLDER + ("%snotes_prediction_output" % CORPUS), 'wb+') as filepath:
      pickle.dump(prediction_output, filepath)
  return prediction_output

def output_midi(prediction_output, duration_prediction_output, dropout, model_size):
  offset = 0
  output_notes = []
  for pattern, duration in zip(prediction_output, duration_prediction_output):

      if ('.' in pattern) or pattern.isdigit():
          chord_array = pattern.split('.')
          chord_notes = []
          for note_instance in chord_array:
              note_object = note.Note(int(note_instance))
              note_object.duration.quarterLength = duration
              note_object.storedInstrument = instrument.Piano()
              chord_notes.append(note_object)
          chord_object = chord.Chord(chord_notes)
          chord_object.offset = offset
          output_notes.append(chord_object)
      elif 'R' == pattern:
          note_object = note.Rest()
          note_object.duration.quarterLength = duration
          note_object.offset = offset
          output_notes.append(note_object)
      else:
          note_object = note.Note(pattern)
          note_object.duration.quarterLength = duration
          note_object.offset = offset
          note_object.storedInstrument = instrument.Piano()
          output_notes.append(note_object)

      offset += 0.5

  midi_stream = stream.Stream(output_notes)
  midi_stream.write('midi', fp=MIDI_OUTPUT_FOLDER + CORPUS + '%s_%s.mid' % (dropout, model_size))
  print('\nWrote midi...')

## 6.2 Preprocessing Optimization

In [20]:
notes_histories = {}
duration_histories = {}

dropout, model_size = DROPOUTS, MODEL_SIZES
setup_plot(dropout, model_size)
print('Running duration training on notewise with rests:%s and root extraction:%s' % (RESTS, ROOT_EXTRACTION))
duration_callbacks, duration_weight_path = create_callback_list('duration', dropout, model_size)
duration_model = create_model(duration_training_data, duration_vocab_size, model_size, dropout)
duration_histories[(dropout, model_size)] = duration_model.fit(duration_training_data, duration_training_label, epochs=EPOCHS, batch_size=DURATION_BATCH_SIZE, callbacks=duration_callbacks, validation_split=0.2)
plot_history(duration_histories[(dropout, model_size)], 'Durations', dropout, model_size)
# output intermed duration
duration_prediction = predict_duration(duration_model, duration_weight_path)

print('\n\nRunning notes training on d:%s s:%s' % (dropout, model_size))
notes_callbacks, note_weight_path = create_callback_list('notes', dropout, model_size)
notes_model = create_model(training_data, vocab_size, model_size, dropout)
notes_histories[(dropout, model_size)] = notes_model.fit(training_data, training_label, epochs=EPOCHS, batch_size=NOTE_BATCH_SIZE, callbacks=notes_callbacks, validation_split=0.2)
plot_history(notes_histories[(dropout, model_size)], 'Notes', dropout, model_size)
save_plot(GRAPHS_FOLDER + EDM_CORPUS + 'dropout=%s_size=%s' % (dropout, model_size))
# output intermed notes
note_prediction = predict_note(notes_model, note_weight_path)

# output final midi
output_midi(note_prediction, duration_prediction, dropout, model_size)

Running duration training on notewise with rests:False and root extraction:True
Train on 10424 samples, validate on 2606 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/

<Figure size 576x396 with 0 Axes>

# Notewise Chord

# 3 Set Training Parameters*

In [0]:
# SET PARAMETERS (ACTION)
RESTS = True
ROOT_EXTRACTION = True
DURATION_BATCH_SIZE = 256
NOTE_BATCH_SIZE = 128
# SPECIFY PARAMETERS TO TEST AS LIST
DROPOUTS = 0
MODEL_SIZES = 256
EPOCHS = 200

# 4 Preprocess Data (MIDI Data Into Notes Corpus and Duration Corpus)

In [22]:
# Test

notes_corpus = []
durations_corpus = []

for file in glob.glob(DATA_FOLDER + "/*.mid"):
    try:
      print("Extracting MIDI File: ", file)
      midi_stream = converter.parse(file)

      notes = None

      partition = instrument.partitionByInstrument(midi_stream)

      if not RESTS:
        # No rests
        if partition: 
            notes = partition.parts[0].recurse()
        else: 
            notes = midi_stream.flat.notes
      else:
        # With rests
        if partition: 
            notes = partition.parts[0].recurse()
        else: 
            notes = midi_stream.notesAndRests
          
      in_song_notes = []
      in_song_durations = []
      for element in notes:
          in_song_durations.append(element.duration.quarterLength)
          if isinstance(element, note.Note):
              in_song_notes.append(str(element.pitch))
          elif RESTS and isinstance(element, note.Rest):
              in_song_notes.append("R")
          elif isinstance(element, chord.Chord):
              if ROOT_EXTRACTION:
                  in_song_notes.append(element.root().nameWithOctave)
              else:
                  in_song.append('.'.join(str(n) for n in element.normalOrder))
    except:
      pass
    
    notes_corpus.append(in_song_notes)
    durations_corpus.append(in_song_durations)
            
# Write
with open(INTERMED_FOLDER + CORPUS + 'notes_corpus', 'wb+') as filepath:
    pickle.dump(notes_corpus, filepath)
    
with open(INTERMED_FOLDER + CORPUS + 'durations_corpus', 'wb+') as filepath:
    pickle.dump(durations_corpus, filepath)

Extracting MIDI File:  drive/Project/train_data/edm_all/Afrojack _ David Guetta - Another Life  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Avicii - Lonely Together.mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Axwell _ Ingrosso - More Than You Know  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Sing Me To Sleep  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Avicii - Without You.mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alesso ft. Matthew Koma - Years (Original Mix) (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Tired  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/train_data/edm_all/Alan Walker - Alone  (midi by Carlo Prato) (www.cprato.com).mid
Extracting MIDI File:  drive/Project/tr

# 5 Preprocess Corpus Into Train Data

## 5.1 Notes Corpus

In [23]:
# load notes corpus
with open(INTERMED_FOLDER + CORPUS + 'notes_corpus', 'rb') as filepath:
    notes_corpus = pickle.load(filepath)
    
# If doing learning for one song at a time only
flattened_notes_corpus = []
for song_notes in notes_corpus:
    flattened_notes_corpus += song_notes

vocab_size = len(set(flattened_notes_corpus))
print(vocab_size)

# Produce input output sequences
window_size = 60
note_sequence_input = []
next_note_output = []

notes = sorted(set(flattened_notes_corpus))
note2int = dict((note, num) for num, note in enumerate(notes))

for i in range(0, len(notes_corpus)):
    for j in range(0, len(notes_corpus[i]) - window_size):
        current_sequence = [note2int[note] for note in notes_corpus[i][j:window_size+j]]
        next_note = note2int[notes_corpus[i][window_size+j]]
        note_sequence_input.append(current_sequence)
        next_note_output.append(next_note)

65


In [24]:
# check train and label shapes
training_data = np.reshape(note_sequence_input, (len(note_sequence_input), window_size , 1))
training_data = training_data / float(vocab_size)
print('Train shape: ' + str(training_data.shape))
training_label = np_utils.to_categorical(next_note_output)
print('Label shape: ' + str(training_label.shape))

Train shape: (11392, 60, 1)
Label shape: (11392, 65)


## 5.2 Durations Corpus

In [25]:
# load notes corpus
with open(INTERMED_FOLDER + CORPUS + 'durations_corpus', 'rb') as filepath:
    duration_corpus = pickle.load(filepath)

# Learn on one song at a time
flattened_duration_corpus = []
for song_durations in duration_corpus:
    flattened_duration_corpus += song_durations
    
import collections
counter = collections.Counter(flattened_duration_corpus)
print(counter)


duration_vocab_size = len(set(flattened_duration_corpus))
print(duration_vocab_size)

# Produce input output sequences
duration_window_size = 10
duration_sequence_input = []
next_duration_output = []

durations = sorted(set(flattened_duration_corpus))
duration2int = dict((duration, num) for num, duration in enumerate(durations))

print(duration2int)
# Write
with open(INTERMED_FOLDER + "/edm_duration_counter", 'wb+') as filepath:
    pickle.dump(counter, filepath)

for i in range(0, len(duration_corpus)):
    for j in range(0, len(duration_corpus[i]) - duration_window_size):
        current_duration_sequence = [duration2int[note] for note in duration_corpus[i][j:duration_window_size+j]]
        next_duration = duration2int[duration_corpus[i][duration_window_size+j]]
        duration_sequence_input.append(current_duration_sequence)
        next_duration_output.append(next_duration)

Counter({0.25: 4993, 0.5: 4383, 0.0: 966, Fraction(1, 3): 935, 1.0: 735, Fraction(1, 6): 729, 0.75: 461, 4.0: 165, 2.0: 122, Fraction(2, 3): 93, Fraction(1, 12): 86, Fraction(5, 12): 72, 1.5: 55, 3.0: 41, Fraction(7, 6): 32, 1.75: 30, 1.25: 29, 2.25: 25, 4.5: 22, 3.5: 13, 6.5: 13, 8.0: 11, 4.25: 9, 2.5: 9, Fraction(25, 6): 7, 5.0: 6, 7.0: 5, 8.5: 4, 32.0: 4, 6.0: 4, Fraction(5, 3): 3, 48.0: 2, 68.0: 2, 176.0: 2, 80.0: 2, 16.0: 2, Fraction(23, 3): 1, 47.5: 1, 3.25: 1, 5.75: 1, 200.5: 1, 36.25: 1, 64.25: 1, 84.25: 1, 72.5: 1, 72.25: 1, 14.5: 1, 9.0: 1, 27.5: 1, 61.0: 1, 161.0: 1, 5.25: 1, 4.75: 1, 94.5: 1, 128.25: 1, 18.0: 1, 39.5: 1, Fraction(553, 6): 1, Fraction(4, 3): 1, 32.5: 1, 36.0: 1})
61
{0.0: 0, Fraction(1, 12): 1, Fraction(1, 6): 2, 0.25: 3, Fraction(1, 3): 4, Fraction(5, 12): 5, 0.5: 6, Fraction(2, 3): 7, 0.75: 8, 1.0: 9, Fraction(7, 6): 10, 1.25: 11, Fraction(4, 3): 12, 1.5: 13, Fraction(5, 3): 14, 1.75: 15, 2.0: 16, 2.25: 17, 2.5: 18, 3.0: 19, 3.25: 20, 3.5: 21, 4.0: 22, Fra

In [26]:
# check train and label shapes
duration_training_data = np.reshape(duration_sequence_input, (len(duration_sequence_input), duration_window_size , 1))
duration_training_data = duration_training_data / float(duration_vocab_size)
print('Train shape: ' + str(duration_training_data.shape))
duration_training_label = np_utils.to_categorical(next_duration_output)
print('Label shape: ' + str(duration_training_label.shape))

Train shape: (13677, 10, 1)
Label shape: (13677, 61)


# 6 Train

## 6.1 Helpers to Create Model

In [0]:
def create_model(network_input, n_vocab, model_size, dropout):
  model = Sequential()
  reg = L1L2(0, 0)
  model.add(LSTM(
      model_size,
      input_shape=(network_input.shape[1], network_input.shape[2]),
      return_sequences=True,
      dropout=dropout, recurrent_dropout=0.3
  ))
  model.add(CuDNNLSTM(model_size, return_sequences=True, kernel_regularizer=reg))
  model.add(Dropout(dropout))
  model.add(CuDNNLSTM(model_size, kernel_regularizer=reg))
  model.add(Dense(128))
  model.add(Dropout(dropout))
  model.add(Dense(n_vocab))
  model.add(Activation('softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  return model

def create_callback_list(model_size, dropout, model_type):
  filepath = INTERMED_FOLDER + '/%skarpathy-model-weights-%s-%s-%s.hdf5' % (CORPUS, model_type, model_size, dropout)
  model_checkpoint = ModelCheckpoint(
    filepath,
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
  )
  return [model_checkpoint], filepath

# acc history
def setup_plot(dropout, size):
  plt.title('Model Accuracy vs. Epoc with Dropout=%s Size=%s' % (dropout, size))
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  
def plot_history(history, model_type, dropout, size):
  plt.plot(history.history['acc'], label="%s train accuracy" % model_type)
  plt.plot(history.history['val_acc'], label="%s val accuracy" % model_type)

def save_plot(file_path):
  plt.legend()
  plt.savefig(file_path)
  plt.clf()
  
def predict_duration(model, WEIGHT_PATH):
  # Prediction
  model.load_weights(WEIGHT_PATH)
  starting_sequence = np.random.randint(219, size=duration_window_size)
  pattern_sequence = starting_sequence.tolist()
  prediction_output = []

  duration2note = dict((num, note) for num, note in enumerate(durations))
  print (duration2note)
  
  for i in range(400):
      prediction_input = np.reshape(pattern_sequence, (1, len(pattern_sequence), 1))
      prediction_input = prediction_input / float(duration_vocab_size)

      if i%3 == 0:   
          print('\r', 'Predicting.  Duration: ', i, end='')  
      if i%3 == 1:   
          print('\r', 'Predicting.. Duration: ', i, end='')
      if i%3 == 2:   
          print('\r', 'Predicting...Duration: ', i, end='')
      prediction = model.predict(prediction_input, verbose=0)

      prediction_values = np.arange(len(prediction[0]))
      prediction_prob = np.asarray(list(prediction[0])) / float(sum(prediction[0]))

      # Most probable note prediction
#       index = np.argmax(prediction)
#       note_instance = duration2note[index]
#       prediction_output.append(note_instance)
      index = np.random.choice(prediction_values, 1, p=prediction_prob)
      note_instance = duration2note[int(index[0])]
      prediction_output.append(note_instance)

      pattern_sequence.append(index)
      pattern_sequence = pattern_sequence[1:len(pattern_sequence)]    

  prediction_output = prediction_output[300:len(prediction_output)]

  # Write
  with open(INTERMED_FOLDER + ("%sduration_prediction_output" % CORPUS), 'wb+') as filepath:
      pickle.dump(prediction_output, filepath)
  
  return prediction_output

def predict_note(model, WEIGHT_PATH):
  model.load_weights(WEIGHT_PATH)
  # Prediction
  starting_sequence = np.random.randint(219, size=window_size)
  pattern_sequence = starting_sequence.tolist()
  prediction_output = []

  int2note = dict((num, note) for num, note in enumerate(notes))
  print (int2note)

  for i in range(400):
      prediction_input = np.reshape(pattern_sequence, (1, len(pattern_sequence), 1))
      prediction_input = prediction_input / float(vocab_size)

      if i%3 == 0:   
          print('\r', 'Predicting.  Note: ', i, end='')  
      if i%3 == 1:   
          print('\r', 'Predicting.. Note: ', i, end='')
      if i%3 == 2:   
          print('\r', 'Predicting...Note: ', i, end='')
      prediction = model.predict(prediction_input, verbose=0)

#       prediction_values = np.arange(len(prediction[0]))
#       prediction_prob = np.asarray(list(prediction[0])) / float(sum(prediction[0]))
#       print(prediction_prob)

      # Most probable note prediction
      index = np.argmax(prediction)
      note_instance = int2note[index]
      prediction_output.append(note_instance)

      # Predict based on prob dist
#       index = np.random.choice(prediction_values, 1, p=prediction_prob)
  #     print(index[0])
  #     print(type(index[0]))
#       note_instance = int2note[int(index[0])]
#       prediction_output.append(note_instance)

      pattern_sequence.append(index)
      pattern_sequence = pattern_sequence[1:len(pattern_sequence)]    

  prediction_output = prediction_output[300:len(prediction_output)]
  # Write
  with open(INTERMED_FOLDER + ("%snotes_prediction_output" % CORPUS), 'wb+') as filepath:
      pickle.dump(prediction_output, filepath)
  return prediction_output

def output_midi(prediction_output, duration_prediction_output, dropout, model_size):
  offset = 0
  output_notes = []
  for pattern, duration in zip(prediction_output, duration_prediction_output):

      if ('.' in pattern) or pattern.isdigit():
          chord_array = pattern.split('.')
          chord_notes = []
          for note_instance in chord_array:
              note_object = note.Note(int(note_instance))
              note_object.duration.quarterLength = duration
              note_object.storedInstrument = instrument.Piano()
              chord_notes.append(note_object)
          chord_object = chord.Chord(chord_notes)
          chord_object.offset = offset
          output_notes.append(chord_object)
      elif 'R' == pattern:
          note_object = note.Rest()
          note_object.duration.quarterLength = duration
          note_object.offset = offset
          output_notes.append(note_object)
      else:
          note_object = note.Note(pattern)
          note_object.duration.quarterLength = duration
          note_object.offset = offset
          note_object.storedInstrument = instrument.Piano()
          output_notes.append(note_object)

      offset += 0.5

  midi_stream = stream.Stream(output_notes)
  midi_stream.write('midi', fp=MIDI_OUTPUT_FOLDER + CORPUS + '%s_%s.mid' % (dropout, model_size))
  print('\nWrote midi...')

## 6.2 Preprocessing Optimization

In [28]:
notes_histories = {}
duration_histories = {}

dropout, model_size = DROPOUTS, MODEL_SIZES
setup_plot(dropout, model_size)
print('Running duration training on notewise with rests:%s and root extraction:%s' % (RESTS, ROOT_EXTRACTION))
duration_callbacks, duration_weight_path = create_callback_list('duration', dropout, model_size)
duration_model = create_model(duration_training_data, duration_vocab_size, model_size, dropout)
duration_histories[(dropout, model_size)] = duration_model.fit(duration_training_data, duration_training_label, epochs=EPOCHS, batch_size=DURATION_BATCH_SIZE, callbacks=duration_callbacks, validation_split=0.2)
plot_history(duration_histories[(dropout, model_size)], 'Durations', dropout, model_size)
# output intermed duration
duration_prediction = predict_duration(duration_model, duration_weight_path)

print('\n\nRunning notes training on d:%s s:%s' % (dropout, model_size))
notes_callbacks, note_weight_path = create_callback_list('notes', dropout, model_size)
notes_model = create_model(training_data, vocab_size, model_size, dropout)
notes_histories[(dropout, model_size)] = notes_model.fit(training_data, training_label, epochs=EPOCHS, batch_size=NOTE_BATCH_SIZE, callbacks=notes_callbacks, validation_split=0.2)
plot_history(notes_histories[(dropout, model_size)], 'Notes', dropout, model_size)
save_plot(GRAPHS_FOLDER + EDM_CORPUS + 'dropout=%s_size=%s' % (dropout, model_size))
# output intermed notes
note_prediction = predict_note(notes_model, note_weight_path)

# output final midi
output_midi(note_prediction, duration_prediction, dropout, model_size)

Running duration training on notewise with rests:True and root extraction:True
Train on 10941 samples, validate on 2736 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/2

<Figure size 576x396 with 0 Axes>