In [16]:
from __future__ import print_function, division
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
!pip install music21
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

[0m/kaggle/input/maestrov3-selected-consolidated/MIDI-Unprocessed_Recital17-19_MID--AUDIO_19_R1_2018_wav--4.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-Unprocessed_XP_08_R1_2004_01-02_ORIG_MID--AUDIO_08_R1_2004_02_Track02_wav.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-Unprocessed_Recital17-19_MID--AUDIO_18_R1_2018_wav--1.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-UNPROCESSED_21-22_R1_2014_MID--AUDIO_22_R1_2014_wav--5.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-Unprocessed_25_R3_2011_MID--AUDIO_R3-D9_06_Track06_wav.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-UNPROCESSED_11-13_R1_2014_MID--AUDIO_13_R1_2014_wav--6.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-UNPROCESSED_04-07-08-10-12-15-17_R2_2014_MID--AUDIO_12_R2_2014_wav.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-Unprocessed_06_R1_2006_01-04_ORIG_MID--AUDIO_06_R1_2006_03_Track03_wav.midi
/kaggle/input/maestrov3-selected-consolidated/MIDI-Unprocessed_

In [17]:
import sys
import matplotlib.pyplot as plt
import numpy as np
import pickle
import glob
from music21 import converter, instrument, note, chord, stream
from keras.layers import Input, Dense, Reshape, Dropout, LSTM, Bidirectional
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers import LeakyReLU
from keras.models import Sequential, Model
from tensorflow.keras.optimizers.legacy import Adam
from keras.utils import np_utils

In [18]:
import pickle
import os
from music21 import *

def get_notes(cache_file_path='/tmp/notes_cache.pickle'):
    if os.path.isfile(cache_file_path):
        with open(cache_file_path, 'rb') as f:
            notes = pickle.load(f)
    else:
        notes = []
        for file in os.listdir('/kaggle/input/maestrov3-selected-consolidated/'):
            if file.endswith('.midi'):
                file_path = os.path.join('/kaggle/input/maestrov3-selected-consolidated/', file)
                midi = converter.parse(file_path, fast=True)

                print("Parsing %s" % file_path)

                notes_to_parse = None

                try:
                    s2 = instrument.partitionByInstrument(midi)
                    notes_to_parse = s2.parts[0].recurse()
                except:
                    notes_to_parse = midi.flat.notes

                prev_offset = 0.0
                for element in notes_to_parse:
                    if isinstance(element, note.Note):
                        duration = element.offset - prev_offset
                        notes.append((element.nameWithOctave, element.pitch.midi, duration))
                        prev_offset = element.offset
                    elif isinstance(element, chord.Chord):
                        duration = element.offset - prev_offset
                        notes.append(('.'.join(n.nameWithOctave for n in element.pitches), 
                                      element.pitches[0].midi, duration))
                        prev_offset = element.offset

        with open(cache_file_path, 'wb') as f:
            pickle.dump(notes, f)

    return notes


In [19]:
def prepare_sequences(notes, n_vocab):

    sequence_length = 256

    pitchnames = sorted(set(item for item in notes))

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    network_input = (network_input - float(n_vocab)/2) / (float(n_vocab)/2)
    network_output = np_utils.to_categorical(network_output)

    return (network_input, network_output)

In [20]:
def generate_notes(model, network_input, n_vocab):

    start = numpy.random.randint(0, len(network_input)-1)
    
    # Get pitch names and store in a dictionary
    pitchnames = sorted(set(item for item in notes))
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)

        index = numpy.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        
        pattern = numpy.append(pattern,index)
        #pattern.append(index)
        pattern = pattern[1:len(pattern)]

    return prediction_output

In [21]:
def create_midi(prediction_output, filename):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for item in prediction_output:
        pattern = item[0]
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(current_note)
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='{}.midi'.format(filename))


In [25]:
class GAN():
    def __init__(self, rows):
        self.seq_length = rows
        self.seq_shape = (self.seq_length, 1)
        self.latent_dim = 1000
        self.disc_loss = []
        self.gen_loss =[]
        
        optimizer = Adam(0.0001, 0.5)

        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

        self.generator = self.build_generator()

        # The generator takes noise as input and generates note sequences
        z = Input(shape=(self.latent_dim,))
        generated_seq = self.generator(z)

        self.discriminator.trainable = False

        validity = self.discriminator(generated_seq)

        self.combined = Model(z, validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)

    def build_discriminator(self):

        model = Sequential()
        model.add(LSTM(512, input_shape=self.seq_shape, return_sequences=True))
        model.add(Bidirectional(LSTM(512)))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))
        model.summary()

        seq = Input(shape=self.seq_shape)
        validity = model(seq)

        return Model(seq, validity)
      
    def build_generator(self):

        model = Sequential()
        model.add(Dense(256, input_dim=self.latent_dim))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(1024))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(np.prod(self.seq_shape), activation='tanh'))
        model.add(Reshape(self.seq_shape))
        model.summary()
        
        noise = Input(shape=(self.latent_dim,))
        seq = model(noise)

        return Model(noise, seq)

    def train(self, epochs, batch_size=128, sample_interval=50):


        notes = get_notes()
        n_vocab = len(set(notes))
        X_train, y_train = prepare_sequences(notes, n_vocab)

        real = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))
        

        for epoch in range(epochs):

            idx = np.random.randint(0, X_train.shape[0], batch_size)
            real_seqs = X_train[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            gen_seqs = self.generator.predict(noise)

            d_loss_real = self.discriminator.train_on_batch(real_seqs, real)
            d_loss_fake = self.discriminator.train_on_batch(gen_seqs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)


            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            g_loss = self.combined.train_on_batch(noise, real)

            if epoch % sample_interval == 0:
              print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
              self.disc_loss.append(d_loss[0])
              self.gen_loss.append(g_loss)
        
        self.generate(notes)
        self.plot_loss()
        
    def generate(self, input_notes):
        notes = input_notes
        pitchnames = sorted(set(item for item in notes))
        int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
        
        noise = np.random.normal(0, 1, (1, self.latent_dim))
        predictions = self.generator.predict(noise)
        
        pred_notes = [x*242+242 for x in predictions[0]]
        pred_notes = [int_to_note[int(x)] for x in pred_notes]
        
        create_midi(pred_notes, 'gan_final')
        
    def plot_loss(self):
        plt.plot(self.disc_loss, c='red')
        plt.plot(self.gen_loss, c='blue')
        plt.title("GAN Loss per Epoch")
        plt.legend(['Discriminator', 'Generator'])
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.savefig('GAN_Loss_per_Epoch.png', transparent=True)
        plt.close()


In [42]:
if __name__ == '__main__':
  gan = GAN(rows=256)    
  gan.train(epochs=1000, batch_size=16, sample_interval=1)

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_18 (LSTM)              (None, 256, 512)          1052672   
                                                                 
 bidirectional_9 (Bidirectio  (None, 1024)             4198400   
 nal)                                                            
                                                                 
 dense_63 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_45 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_64 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_46 (LeakyReLU)  (None, 256)               0         
                                                     

In [43]:
if __name__ == '__main__':
  gan = GAN(rows=256)    
  gan.train(epochs=100, batch_size=2, sample_interval=1)

Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_20 (LSTM)              (None, 256, 512)          1052672   
                                                                 
 bidirectional_10 (Bidirecti  (None, 1024)             4198400   
 onal)                                                           
                                                                 
 dense_70 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_50 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_71 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_51 (LeakyReLU)  (None, 256)               0         
                                                     

In [44]:
if __name__ == '__main__':
  gan = GAN(rows=256)    
  gan.train(epochs=50, batch_size=1, sample_interval=1)

Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_22 (LSTM)              (None, 256, 512)          1052672   
                                                                 
 bidirectional_11 (Bidirecti  (None, 1024)             4198400   
 onal)                                                           
                                                                 
 dense_77 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_55 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_78 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_56 (LeakyReLU)  (None, 256)               0         
                                                     

In [None]:
if __name__ == '__main__':
  gan = GAN(rows=256)    
  gan.train(epochs=10000, batch_size=48, sample_interval=1)

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_17 (LSTM)              (None, 256, 512)          1052672   
                                                                 
 bidirectional_8 (Bidirectio  (None, 1024)             4198400   
 nal)                                                            
                                                                 
 dense_56 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_40 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_57 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_41 (LeakyReLU)  (None, 256)               0         
                                                     

In [None]:
if __name__ == '__main__':
  gan = GAN(rows=256)    
  gan.train(epochs=2000, batch_size=8, sample_interval=1)

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 256, 512)          1052672   
                                                                 
 bidirectional_6 (Bidirectio  (None, 1024)             4198400   
 nal)                                                            
                                                                 
 dense_42 (Dense)            (None, 512)               524800    
                                                                 
 leaky_re_lu_30 (LeakyReLU)  (None, 512)               0         
                                                                 
 dense_43 (Dense)            (None, 256)               131328    
                                                                 
 leaky_re_lu_31 (LeakyReLU)  (None, 256)               0         
                                                     