# Train GAN

In [1]:
import os
import numpy as np
import glob
import pickle
from tqdm import tqdm
from timeit import default_timer as timer
import matplotlib.pyplot as plt
from music21 import converter, instrument, note, chord, stream
from tensorflow.keras.layers import Activation, Dense, Bidirectional, LSTM, LeakyReLU, BatchNormalization, Reshape, Input
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [2]:
PATH_TO_DATASET = './Dataset/jigs'
PATH_TO_MODEL = './Models'

In [3]:
# print(os.listdir(PATH_TO_DATASET))

In [4]:
midi_file_list = glob.glob(PATH_TO_DATASET + '/*.mid')
print(len(midi_file_list))

340


In [5]:
def extract_notes_from_dataset():
    notes = []

    for i in tqdm(range(len(midi_file_list)), desc='Parsing MIDI files', ncols = 100):
        midi_file = midi_file_list[i]

        midi = converter.parse(midi_file)

        notes_to_parse = None

        parts = instrument.partitionByInstrument(midi)

        if parts: # file has instrument parts
            notes_to_parse = parts.parts[0].recurse()
        else: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes


        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))

    with open('notes', 'wb') as filepath:
        # write notes in binary format to filepath
        pickle.dump(notes, filepath)

    return notes

In [6]:
notes = extract_notes_from_dataset()

print('\nTotal number of MIDI files in dataset: ', len(midi_file_list))
print('\nTotal number of notes: ', len(notes))
print('Total number of unique notes: ', len(set(notes)))

Parsing MIDI files: 100%|█████████████████████████████████████████| 340/340 [00:51<00:00,  6.56it/s]



Total number of MIDI files in dataset:  340

Total number of notes:  85829
Total number of unique notes:  75


In [7]:
sequence_length = 100

unique_notes = sorted(set(notes))
unique_notes_count = len(unique_notes)

int_to_note = dict((index, note) for index, note in enumerate(unique_notes))
note_to_int = dict((note, index) for index, note in enumerate(unique_notes))

print(note_to_int)

{'0.3.6': 0, '0.3.7': 1, '0.4.7': 2, '0.4.8': 3, '1.4.7': 4, '1.4.7.9': 5, '1.4.8': 6, '10.1.4.6': 7, '10.2.5': 8, '11.2.4.7': 9, '11.2.5.7': 10, '11.2.6': 11, '11.3.6': 12, '2.5.8': 13, '2.5.8.10': 14, '2.5.9': 15, '2.6.9': 16, '3.6.9.11': 17, '3.7.10': 18, '4.7.10': 19, '4.7.10.0': 20, '4.7.11': 21, '4.8.11': 22, '5.8.11': 23, '5.9.0': 24, '6.10.1': 25, '6.9.0.2': 26, '6.9.0.2.3': 27, '6.9.1': 28, '6.9.11': 29, '6.9.11.2': 30, '7.10.1': 31, '7.10.2': 32, '7.11.2': 33, '7.9.1': 34, '8.11.2.4': 35, '8.11.2.4.5': 36, '9.0.3': 37, '9.0.3.5': 38, '9.0.4': 39, '9.1': 40, '9.1.4': 41, '9.11.2.5': 42, 'A3': 43, 'A4': 44, 'A5': 45, 'B-3': 46, 'B-4': 47, 'B-5': 48, 'B3': 49, 'B4': 50, 'B5': 51, 'C#4': 52, 'C#5': 53, 'C#6': 54, 'C4': 55, 'C5': 56, 'C6': 57, 'D4': 58, 'D5': 59, 'D6': 60, 'E-4': 61, 'E-5': 62, 'E4': 63, 'E5': 64, 'F#4': 65, 'F#5': 66, 'F4': 67, 'F5': 68, 'G#3': 69, 'G#4': 70, 'G#5': 71, 'G3': 72, 'G4': 73, 'G5': 74}


In [8]:
trainX = []
trainY = []

for i in range(len(notes) - sequence_length):
    input_seq = notes[i:i+sequence_length]
    target_seq = notes[i+sequence_length]
    
    input_seq_int = [note_to_int[note] for note in input_seq]
    target_seq_int = note_to_int[target_seq]

    trainX.append(input_seq_int)
    trainY.append(target_seq_int)

num_training_seq = len(trainX)

print('Number of training sequences: ', num_training_seq)
print('\nFirst training sequence: ', trainX[0])
print('\nFirst target sequence: ', trainY[0])

Number of training sequences:  85729

First training sequence:  [66, 64, 41, 53, 53, 53, 66, 64, 41, 53, 53, 53, 66, 64, 41, 53, 53, 53, 66, 50, 11, 53, 50, 50, 35, 66, 64, 41, 53, 53, 53, 66, 64, 41, 53, 53, 53, 53, 59, 64, 16, 66, 64, 59, 35, 53, 50, 44, 41, 53, 64, 45, 66, 64, 41, 53, 53, 53, 66, 64, 41, 53, 53, 53, 66, 64, 41, 53, 53, 53, 66, 50, 11, 53, 50, 50, 35, 66, 64, 41, 53, 53, 53, 66, 64, 41, 53, 53, 53, 53, 59, 64, 16, 66, 64, 59, 35, 53, 50, 44]

First target sequence:  41


In [9]:
trainX = np.reshape(trainX, (num_training_seq, sequence_length, 1))
print('Shape of trainX after reshaping', trainX.shape)

trainX = (trainX - float(unique_notes_count)/2)/(float(unique_notes_count)/2)

trainY = to_categorical(trainY)
print('Shape of trainY after one hot encoding: ', trainY.shape)


Shape of trainX after reshaping (85729, 100, 1)
Shape of trainY after one hot encoding:  (85729, 75)


# **MODEL**

In [10]:
epochs = 1000
batch_size = 32
learning_rate = 2e-4
seq_length = 100
latent_dim = 1000
seq_shape = (seq_length, 1)
generator_loss = []
discriminator_loss = []

optimizer = Adam(learning_rate)

In [11]:
def build_discriminator():

    model = Sequential()
    model.add(LSTM(512, input_shape=seq_shape, return_sequences=True))
    model.add(Bidirectional(LSTM(512)))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))

    print('\nDiscriminator model: \n')
    model.summary()

    input_seq = Input(shape=seq_shape)
    output_seq = model(input_seq)

    return Model(input_seq, output_seq)

In [12]:
def build_generator():

    model = Sequential()
    model.add(Dense(256, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(np.prod(seq_shape), activation='tanh'))
    model.add(Reshape(seq_shape))
    
    print('\nGenerator model: \n')
    model.summary()
    
    input_noise = Input(shape=(latent_dim,))
    output_seq = model(input_noise)

    return Model(input_noise, output_seq)

In [13]:
# Build and compile the discriminator
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Build the generator
generator = build_generator()

# Take output from generator after feeding it with noise
input_seq = Input(shape=(latent_dim,))
generated_seq = generator(input_seq)

# For combined model only generator should be trained
discriminator.trainable = False

output_seq = discriminator(generated_seq)

combinedModel = Model(input_seq, output_seq)
combinedModel.compile(loss='binary_crossentropy', optimizer=optimizer)


Discriminator model: 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 512)          1052672   
_________________________________________________________________
bidirectional (Bidirectional (None, 1024)              4198400   
_________________________________________________________________
dense (Dense)                (None, 512)               524800    
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (No

In [14]:
def trainGAN():
    global notes, unique_notes_count, trainX, trainY, epochs, batch_size, latent_dim, discriminator_loss, generator_loss
    
    discriminator_loss = []
    generator_loss = []
    
    real = np.ones((batch_size,1))
    fake = np.zeros((batch_size, 1))
    
    start_time = timer()
    
    for epoch in range(epochs):
        
        # Choosing a batch of sequences randomly to train model
        seq_indexes = np.random.randint(0, len(trainX), batch_size)
        real_seq_batch = trainX[seq_indexes]
        
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        generated_seq_batch = generator.predict(noise)
        
        # Training discriminator
        disc_loss_real = discriminator.train_on_batch(real_seq_batch, real)
        disc_loss_fake = discriminator.train_on_batch(generated_seq_batch, fake)
        
        disc_loss = np.add(disc_loss_real, disc_loss_fake) * 0.5
        
        # Training generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        gen_loss = combinedModel.train_on_batch(noise, real)
        
        if epoch == 0 or (epoch+1) % 5 == 0:
            print('EPOCH: {:<4} / {} \t DISC_LOSS: {:.2f} \t DISC_ACC: {:.2f} \t GEN_LOSS: {:.2f}'.format(epoch+1, epochs, disc_loss[0], disc_loss[1], gen_loss))
            discriminator_loss.append(disc_loss[0])
            generator_loss.append(gen_loss)
            
    end_time = timer()
    elapsed_time = '{:.2f}s'.format(end_time - start_time)
    print('\nTota time elapsed: ', elapsed_time)

In [42]:
trainGAN()

EPOCH: 1    / 1000 	 DISC_LOSS: 0.70 	 DISC_ACC: 0.00 	 GEN_LOSS: 0.69
EPOCH: 5    / 1000 	 DISC_LOSS: 0.60 	 DISC_ACC: 0.70 	 GEN_LOSS: 0.67
EPOCH: 10   / 1000 	 DISC_LOSS: 0.27 	 DISC_ACC: 0.91 	 GEN_LOSS: 0.92
EPOCH: 15   / 1000 	 DISC_LOSS: 0.16 	 DISC_ACC: 0.98 	 GEN_LOSS: 7.47
EPOCH: 20   / 1000 	 DISC_LOSS: 0.09 	 DISC_ACC: 0.98 	 GEN_LOSS: 8.08
EPOCH: 25   / 1000 	 DISC_LOSS: 0.11 	 DISC_ACC: 0.98 	 GEN_LOSS: 8.76
EPOCH: 30   / 1000 	 DISC_LOSS: 0.26 	 DISC_ACC: 0.94 	 GEN_LOSS: 3.89
EPOCH: 35   / 1000 	 DISC_LOSS: 0.11 	 DISC_ACC: 0.95 	 GEN_LOSS: 9.04
EPOCH: 40   / 1000 	 DISC_LOSS: 0.41 	 DISC_ACC: 0.86 	 GEN_LOSS: 13.56
EPOCH: 45   / 1000 	 DISC_LOSS: 0.27 	 DISC_ACC: 0.91 	 GEN_LOSS: 16.81
EPOCH: 50   / 1000 	 DISC_LOSS: 0.16 	 DISC_ACC: 0.97 	 GEN_LOSS: 8.42
EPOCH: 55   / 1000 	 DISC_LOSS: 0.23 	 DISC_ACC: 0.94 	 GEN_LOSS: 7.68
EPOCH: 60   / 1000 	 DISC_LOSS: 0.28 	 DISC_ACC: 0.89 	 GEN_LOSS: 18.56
EPOCH: 65   / 1000 	 DISC_LOSS: 0.08 	 DISC_ACC: 0.97 	 GEN_LOSS: 26.04
EP

EPOCH: 580  / 1000 	 DISC_LOSS: 0.20 	 DISC_ACC: 0.94 	 GEN_LOSS: 4.44
EPOCH: 585  / 1000 	 DISC_LOSS: 0.25 	 DISC_ACC: 0.89 	 GEN_LOSS: 4.17
EPOCH: 590  / 1000 	 DISC_LOSS: 0.23 	 DISC_ACC: 0.91 	 GEN_LOSS: 3.62
EPOCH: 595  / 1000 	 DISC_LOSS: 0.17 	 DISC_ACC: 0.94 	 GEN_LOSS: 4.63
EPOCH: 600  / 1000 	 DISC_LOSS: 0.20 	 DISC_ACC: 0.94 	 GEN_LOSS: 4.94
EPOCH: 605  / 1000 	 DISC_LOSS: 0.07 	 DISC_ACC: 0.98 	 GEN_LOSS: 3.34
EPOCH: 610  / 1000 	 DISC_LOSS: 0.20 	 DISC_ACC: 0.92 	 GEN_LOSS: 4.43
EPOCH: 615  / 1000 	 DISC_LOSS: 0.14 	 DISC_ACC: 0.95 	 GEN_LOSS: 4.05
EPOCH: 620  / 1000 	 DISC_LOSS: 0.26 	 DISC_ACC: 0.91 	 GEN_LOSS: 4.11
EPOCH: 625  / 1000 	 DISC_LOSS: 0.14 	 DISC_ACC: 0.95 	 GEN_LOSS: 4.60
EPOCH: 630  / 1000 	 DISC_LOSS: 0.17 	 DISC_ACC: 0.94 	 GEN_LOSS: 4.41
EPOCH: 635  / 1000 	 DISC_LOSS: 0.17 	 DISC_ACC: 0.94 	 GEN_LOSS: 3.74
EPOCH: 640  / 1000 	 DISC_LOSS: 0.24 	 DISC_ACC: 0.92 	 GEN_LOSS: 3.75
EPOCH: 645  / 1000 	 DISC_LOSS: 0.11 	 DISC_ACC: 0.97 	 GEN_LOSS: 3.93
EPOCH:

In [43]:
generator.save('./Models/generator_model.h5')

In [17]:
discriminator.save('./Models/discriminator_model.h5')