In [None]:
!pip install -r requirements.txt

In [None]:
# Imports
import os
import glob
import json
import numpy as np
import keras
from enum import Enum
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, BatchNormalization, Dropout
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from random import randrange
import random
import math
import pypianoroll
from utils.midi_utils import play_midi, plot_pianoroll, get_music_metrics, process_pianoroll, process_midi
from constants import Constants
from augmentation import AddAndRemoveAPercentageOfNotes
from data_generator import PianoRollGenerator
from utils.generate_training_plots import GenerateTrainingPlots
from inference import Inference
from model import OptimizerType
from model import ArCnnModel

In [None]:
sampling_lower_bound_remove = 0 
sampling_upper_bound_remove = 100
sampling_lower_bound_add = 1
sampling_upper_bound_add = 1.5

In [None]:
# Customized loss function
class Loss():
    @staticmethod 
    def built_in_softmax_kl_loss(target, output):
        '''
        Custom Loss Function
        :param target: ground truth values
        :param output: predicted values
        :return kullback_leibler_divergence loss
        '''
        target = K.flatten(target)
        output = K.flatten(output)
        target = target / K.sum(target)
        output = K.softmax(output)
        return keras.losses.kullback_leibler_divergence(target, output)

In [73]:
#Import the MIDI files from the data_dir and save them with the midi_files variable  
midi_files = []
midi_files.extend(glob.glob('data/gb/**/*.mid'))
midi_files.extend(glob.glob('data/ms/**/*.mid'))
midi_files.extend(glob.glob('data/nes/**/*.mid'))
midi_files.extend(glob.glob('data/snes/**/*.mid'))

# Generate MIDI file samples
def generate_samples(midi_files, bars, beats_per_bar, beat_resolution, bars_shifted_per_sample):
    """
    dataset_files: All files in the dataset
    return: piano roll samples sized to X bars
    """
    timesteps_per_nbars = bars * beats_per_bar * beat_resolution
    time_steps_shifted_per_sample = bars_shifted_per_sample * beats_per_bar * beat_resolution
    samples = []
    for midi_file in midi_files:
        print('process ' + midi_file + '...')
        pianoroll = process_midi(midi_file, beat_resolution) # Parse the MIDI file and get the piano roll
        samples.extend(process_pianoroll(pianoroll, time_steps_shifted_per_sample, timesteps_per_nbars))
    return samples

# Saving the generated samples into a dataset variable 
dataset_samples = generate_samples(midi_files, Constants.bars, Constants.beats_per_bar,Constants.beat_resolution, Constants.bars_shifted_per_sample)

# Shuffle the dataset
random.shuffle(dataset_samples);

dataset_size = len(dataset_samples)
dataset_split = math.floor(dataset_size * Constants.training_validation_split) 

training_samples = dataset_samples[0:dataset_split]
print("training samples length: {}".format(len(training_samples)))
validation_samples = dataset_samples[dataset_split + 1:dataset_size]
print("validation samples length: {}".format(len(validation_samples)))

process data/gb/duck_tales/title_screen.mid...
process data/gb/duck_tales/transylvania.mid...
process data/gb/duck_tales/the_moon2.mid...
process data/gb/duck_tales/african_mines.mid...
process data/gb/duck_tales/the_moon.mid...
process data/gb/duck_tales/land_select.mid...
process data/gb/duck_tales/himalayas.mid...
process data/gb/zelda/laegg.mid...
process data/gb/zelda/laanimal.mid...
process data/gb/zelda/Zgbch6.mid...
process data/gb/zelda/lalev3.mid...
process data/gb/zelda/la1.mid...
process data/gb/zelda/latrendy.mid...
process data/gb/zelda/Zgbch3.mid...
process data/gb/zelda/Z4owl.mid...
process data/gb/zelda/lacavern.mid...
process data/gb/zelda/latitle.mid...
process data/gb/zelda/lawoods.mid...
process data/gb/zelda/Z4level5.mid...
process data/gb/zelda/Zgbforet.mid...
process data/gb/zelda/Zgbhouse.mid...
process data/gb/zelda/lahouse.mid...
process data/gb/zelda/laboat.mid...
process data/gb/zelda/lavilla.mid...
process data/gb/zelda/lamabe.mid...
process data/gb/zelda/

process data/nes/mario3/sm3grass.mid...
process data/nes/mario3/sm3undw.mid...
process data/nes/mario3/smb3ice.mid...
process data/nes/mario3/sm3underwater.mid...
process data/nes/mario3/mario.mid...
process data/nes/mario3/smb3sky.mid...
process data/nes/mario3/bowser.mid...
process data/nes/mario3/sm3fortress.mid...
process data/nes/mario3/sm3ocean.mid...
process data/nes/mario3/smb3nspade.mid...
process data/nes/mario3/sm3airship.mid...
process data/nes/mario3/Smb3toad.mid...
process data/nes/mario3/sm3wd1.mid...
process data/nes/mario3/sm3wd3.mid...
process data/nes/mario3/3ending.mid...
process data/nes/mario3/sm3mboss.mid...
process data/nes/mario3/sm3deser.mid...
process data/nes/mario3/smb3king.mid...
process data/nes/mario3/sm3ow2.mid...
process data/nes/mario3/smb3hbros.mid...
process data/nes/mario3/sm3big.mid...
process data/snes/zelda/z3castl1.mid...
process data/snes/zelda/z3sanct1.mid...
process data/snes/zelda/z3evil.mid...
process data/snes/zelda/z3sanct.mid...
process

In [74]:
# Piano Roll Input Dimensions
input_dim = (Constants.bars * Constants.beats_per_bar * Constants.beat_resolution, 
             Constants.number_of_pitches, 
             Constants.number_of_channels)
# Number of Filters In The Convolution
num_filters = 32
# Growth Rate Of Number Of Filters At Each Convolution
growth_factor = 2
# Number Of Encoder And Decoder Layers
num_layers = 5
# A List Of Dropout Values At Each Encoder Layer
dropout_rate_encoder = [0, 0.5, 0.5, 0.5, 0.5]
# A List Of Dropout Values At Each Decoder Layer
dropout_rate_decoder = [0.5, 0.5, 0.5, 0.5, 0]
# A List Of Flags To Ensure If batch_normalization Should be performed At Each Encoder
batch_norm_encoder = [True, True, True, True, False]
# A List Of Flags To Ensure If batch_normalization Should be performed At Each Decoder
batch_norm_decoder = [True, True, True, True, False]
# Path to Pretrained Model If You Want To Initialize Weights Of The Network With The Pretrained Model
pre_trained = False
# Learning Rate Of The Model
learning_rate = 0.001
# Optimizer To Use While Training The Model
optimizer_enum = OptimizerType.ADAM
# Batch Size
batch_size = 32
# Number Of Epochs
epochs = 2

In [75]:
# The Number of Batch Iterations Before A Training Epoch Is Considered Finished
steps_per_epoch = int(
    len(training_samples) * Constants.samples_per_ground_truth_data_item / int(batch_size))

print("The Total Number Of Steps Per Epoch Are: "+ str(steps_per_epoch))

# Total Number Of Time Steps
n_timesteps = Constants.bars * Constants.beat_resolution * Constants.beats_per_bar

The Total Number Of Steps Per Epoch Are: 452


In [76]:
## Training Data Generator
training_data_generator = PianoRollGenerator(sample_list=training_samples,
                                             sampling_lower_bound_remove = sampling_lower_bound_remove,
                                             sampling_upper_bound_remove = sampling_upper_bound_remove,
                                             sampling_lower_bound_add = sampling_lower_bound_add,
                                             sampling_upper_bound_add = sampling_upper_bound_add,
                                             batch_size = batch_size,
                                             bars = Constants.bars,
                                             samples_per_data_item = Constants.samples_per_ground_truth_data_item,
                                             beat_resolution = Constants.beat_resolution,
                                             beats_per_bar = Constants.beats_per_bar,
                                             number_of_pitches = Constants.number_of_pitches,
                                             number_of_channels = Constants.number_of_channels)

In [77]:
# Validation Data Generator
validation_data_generator = PianoRollGenerator(sample_list = validation_samples,
                                               sampling_lower_bound_remove = sampling_lower_bound_remove,
                                               sampling_upper_bound_remove = sampling_upper_bound_remove,
                                               sampling_lower_bound_add = sampling_lower_bound_add,
                                               sampling_upper_bound_add = sampling_upper_bound_add,
                                               batch_size = batch_size, 
                                               bars = Constants.bars,
                                               samples_per_data_item = Constants.samples_per_ground_truth_data_item,
                                               beat_resolution = Constants.beat_resolution,
                                               beats_per_bar = Constants.beats_per_bar, 
                                               number_of_pitches = Constants.number_of_pitches,
                                               number_of_channels = Constants.number_of_channels)

In [78]:
# Callback For Loss Plots 
plot_losses = GenerateTrainingPlots()
## Checkpoint Path
checkpoint_filepath =  'best-model-epoch.hdf5'

# Callback For Saving Model Checkpoints 
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_loss',
    mode='min',
    save_best_only=True)

# Create A List Of Callbacks
callbacks_list = [plot_losses, model_checkpoint_callback]

In [79]:
# Create A Model Instance
MusicModel = ArCnnModel(input_dim = input_dim,
                        num_filters = num_filters,
                        growth_factor = growth_factor,
                        num_layers = num_layers,
                        dropout_rate_encoder = dropout_rate_encoder,
                        dropout_rate_decoder = dropout_rate_decoder,
                        batch_norm_encoder = batch_norm_encoder,
                        batch_norm_decoder = batch_norm_decoder,
                        pre_trained = pre_trained,
                        learning_rate = learning_rate,
                        optimizer_enum = optimizer_enum)

In [80]:
model = MusicModel.build_model()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 128, 128, 1) 0                                            
__________________________________________________________________________________________________
conv2d_92 (Conv2D)              (None, 128, 128, 32) 320         input_5[0][0]                    
__________________________________________________________________________________________________
conv2d_93 (Conv2D)              (None, 128, 128, 32) 9248        conv2d_92[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_41 (MaxPooling2D) (None, 64, 64, 32)   0           conv2d_93[0][0]                  
____________________________________________________________________________________________

In [None]:
# Start Training
history = model.fit_generator(training_data_generator,
                              validation_data = validation_data_generator,
                              steps_per_epoch = steps_per_epoch,
                              epochs = epochs,
                              callbacks = callbacks_list)

Epoch 1/2
  2/452 [..............................] - ETA: 1:45:54 - loss: 3.6714