In [1]:
# conda install python-graphviz

In [2]:
# pip install pydot==1.3.0

In [3]:
# pip install graphviz==0.10.1 

In [4]:
# pip install python-pydot python-pydot-ng graphviz

In [5]:
import cv2
import midi
import numpy as np
import math
import sys, random, os
from matplotlib import pyplot as plt
import pydot
import cv2
import midi
import keras
import theano


from mido import MidiFile, MidiTrack, Message

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense
from keras.layers import Input, Dense, Activation, Dropout, Flatten, Reshape, Permute, RepeatVector, ActivityRegularization, TimeDistributed, Lambda, SpatialDropout1D
from keras.layers.convolutional import Conv1D, Conv2D, Conv2DTranspose, UpSampling2D, ZeroPadding2D
from keras.layers.embeddings import Embedding
from keras.layers.local import LocallyConnected2D
from keras.layers.pooling import MaxPooling2D, AveragePooling2D
from keras.layers.noise import GaussianNoise
from keras.layers.normalization import BatchNormalization
from keras.layers.recurrent import LSTM, SimpleRNN
from keras.initializers import RandomNormal
from keras.losses import binary_crossentropy
from keras.models import Model, Sequential, load_model
from keras.optimizers import Adam, RMSprop, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from keras.utils import plot_model
from keras import backend as K
from keras import regularizers
from keras.engine.topology import Layer

print ("Theano Version: " + theano.__version__)
print ("Keras Version: " + keras.__version__)
print ("Tensorflow Version: " + tf.__version__)

Theano Version: 1.0.5
Keras Version: 2.3.1
Tensorflow Version: 2.2.0


In [6]:
# Hyper Parameters
NUM_EPOCHS = 200
LR = 0.001
CONTINUE_TRAIN = False
PLAY_ONLY = False
USE_EMBEDDING = False
USE_VAE = False
WRITE_HISTORY = True
NUM_RAND_SONGS = 10
DO_RATE = 0.1
BN_M = 0.9
VAE_B1 = 0.02
VAE_B2 = 0.1

BATCH_SIZE = 350
MAX_LENGTH = 16
PARAM_SIZE = 120
NUM_OFFSETS = 16 if USE_EMBEDDING else 1

In [7]:
## utility functions ##

def transpose_range(samples):
	merged_sample = np.zeros_like(samples[0])
	for sample in samples:
		merged_sample = np.maximum(merged_sample, sample)
	merged_sample = np.amax(merged_sample, axis=0)
	min_note = np.argmax(merged_sample)
	max_note = merged_sample.shape[0] - np.argmax(merged_sample[::-1])
	return min_note, max_note

def generate_add_centered_transpose(samples):
	num_notes = samples[0].shape[1]
	min_note, max_note = transpose_range(samples)
	s = num_notes/2 - (max_note + min_note)/2
	out_samples = samples
	out_lens = [len(samples), len(samples)]
	for i in range(len(samples)):
		out_sample = np.zeros_like(samples[i])
		out_sample[:,int(min_note+s):int(max_note+s)] = samples[i][:,min_note:max_note]
		out_samples.append(out_sample)
	return out_samples, out_lens
	
def generate_all_transpose(samples, radius=6):
	num_notes = samples[0].shape[1]
	min_note, max_note = transpose_range(samples)
	min_shift = -min(radius, min_note)
	max_shift = min(radius, num_notes - max_note)
	out_samples = []
	out_lens = []
	for s in range(min_shift, max_shift):
		for i in range(len(samples)):
			out_sample = np.zeros_like(samples[i])
			out_sample[:,min_note+s:max_note+s] = samples[i][:,min_note:max_note]
			out_samples.append(out_sample)
		out_lens.append(len(samples))
	return out_samples, out_lens

def sample_to_pic(fname, sample, thresh=None):
	if thresh is not None:
		inverted = np.where(sample > thresh, 0, 1)
	else:
		inverted = 1.0 - sample
	cv2.imwrite(fname, inverted * 255)

def samples_to_pics(dir, samples, thresh=None):
	if not os.path.exists(dir): os.makedirs(dir)
	for i in range(samples.shape[0]):
		sample_to_pic(dir + '/s' + str(i) + '.png', samples[i], thresh)

def pad_songs(y, y_lens, max_len):
	y_shape = (y_lens.shape[0], max_len) + y.shape[1:]
	y_train = np.zeros(y_shape, dtype=np.float32)
	cur_ix = 0
	for i in range(y_lens.shape[0]):
		end_ix = cur_ix + y_lens[i]
		for j in range(max_len):
			k = j % (end_ix - cur_ix)
			y_train[i,j] = y[cur_ix + k]
		cur_ix = end_ix
	assert(end_ix == y.shape[0])
	return y_train

def sample_to_pattern(sample, ix, size):
	num_pats = 0
	pat_types = {}
	pat_list = []
	num_samples = len(sample) if type(sample) is list else sample.shape[0]
	for i in range(size):
		j = (ix + i) % num_samples
		measure = sample[j].tobytes()
		if measure not in pat_types:
			pat_types[measure] = num_pats
			num_pats += 1
		pat_list.append(pat_types[measure])
	return str(pat_list), pat_types

def embed_samples(samples):
	note_dict = {}
	n, m, p = samples.shape
	samples.flags.writeable = False
	e_samples = np.empty(samples.shape[:2], dtype=np.int32)
	for i in range(n):
		for j in range(m):
			note = samples[i,j].data
			if note not in note_dict:
				note_dict[note] = len(note_dict)
			e_samples[i,j] = note_dict[note]
	samples.flags.writeable = True
	lookup = np.empty((len(note_dict), p), dtype=np.float32)
	for k in note_dict:
		lookup[note_dict[k]] = k
	return e_samples, note_dict, lookup

def e_to_samples(e_samples, lookup):
	samples = np.empty(e_samples.shape + lookup.shape[-1:], dtype=np.float32)
	n, m = e_samples.shape
	for i in range(n):
		for j in range(m):
			samples[i,j] = lookup[e_samples[i,j]]
	return samples

In [8]:
num_notes = 96
samples_per_measure = 96

def midi_to_samples(fname):
    has_time_sig = False
    flag_warning = False
    mid = MidiFile(fname)
    ticks_per_beat = mid.ticks_per_beat
    ticks_per_measure = 4 * ticks_per_beat

    for i, track in enumerate(mid.tracks):
        for msg in track:
            if msg.type == 'time_signature':
                new_tpm = msg.numerator * ticks_per_beat * 4 / msg.denominator
                if has_time_sig and new_tpm != ticks_per_measure:
                    flag_warning = True
                ticks_per_measure = new_tpm
                has_time_sig = True
    if flag_warning:
#         print ("  ^^^^^^ WARNING ^^^^^^ " )
        return []
    all_notes = {}
    for i, track in enumerate(mid.tracks):
        abs_time = 0
        for msg in track:
            abs_time += msg.time
            if msg.type == 'note_on':
                if msg.velocity == 0:
                    continue
                note = msg.note - (128 - num_notes)/2
                assert(note >= 0 and note < num_notes)
                if note not in all_notes:
                    all_notes[note] = []
                else:
                    single_note = all_notes[note][-1]
                    if len(single_note) == 1:
                        single_note.append(single_note[0] + 1)
                all_notes[note].append([abs_time * samples_per_measure / ticks_per_measure])
            elif msg.type == 'note_off':
                if len(all_notes[note][-1]) != 1:
                    continue
                all_notes[note][-1].append(abs_time * samples_per_measure / ticks_per_measure)    
    for note in all_notes:
        for start_end in all_notes[note]:
            if len(start_end) == 1:
                start_end.append(start_end[0] + 1)
    samples = []
    for note in all_notes:
        for start, end in all_notes[note]:
            sample_ix = start / samples_per_measure
            while len(samples) <= sample_ix:
                samples.append(np.zeros((samples_per_measure, num_notes), dtype=np.uint8))
            sample = samples[int(sample_ix)]
            start_ix = start - sample_ix * samples_per_measure        
            if False:
                end_ix = min(end - sample_ix * samples_per_measure, samples_per_measure)
                while start_ix < end_ix:
                    sample[int(start_ix), int(note)] = 1
                    start_ix += 1
            else:
                sample[int(start_ix), int(note)] = 1 
    return samples

In [9]:
## LOAD SONGS ##
patterns = {}
dirs = ["data/"]
all_samples = []
all_lens = []
print ("Loading Songs...")
for dir in dirs:
    for root, subdirs, files in os.walk(dir):
        for file in files:
            path = root + "/" + file
            if not (path.endswith('.mid') or path.endswith('.midi')):
                continue
            try:
                samples = midi_to_samples(path)
            except Exception as inst:
                print ("ERROR ", path, inst)
                continue
            if len(samples) < 8:
                continue
            samples, lens = generate_add_centered_transpose(samples)
            all_samples += samples
            all_lens += lens
            print ("Saving " + path)
assert(sum(all_lens) == len(all_samples))
print ("Saving " + str(len(all_samples)) + " samples...")
all_samples = np.array(all_samples, dtype=np.uint8)
all_lens = np.array(all_lens, dtype=np.uint32)
np.save('samples.npy', all_samples)
np.save('lengths.npy', all_lens)
print ("Done")

Loading Songs...
Saving data/Midi/2a60530c7dccc812f5b8acaed9ade9ed.mid
Saving data/Midi/c1570bd149d3b8e63e45be58552fea19.mid
Saving data/Midi/0d15ccb09ec9efa67f933ec37c56c5eb.mid
ERROR  data/Midi/3268d19b81145078d1a8f450feb07d7b.mid 
ERROR  data/Midi/a478302cafde2aa91b28e57cae9efea7.mid 
Saving data/Midi/7daacb647e8968e7422d59912f8153fd.mid
Saving data/Midi/3abdac41f329dda66f7963d47bb311aa.mid
Saving data/Midi/b3873990c90cfd2a6fe3ae8040ecb0af.mid
Saving data/Midi/56699e4a5f6343f56f0179ebfa6fc078.mid
Saving data/Midi/b8e21a6a26ea481e0301463cd6311bf4.mid
Saving data/Midi/587bf1c0c607b5b1566ff64dbfe8fc1c.mid
ERROR  data/Midi/c94f0a7845a2bc9214693f8da9fa0452.mid 
Saving data/Midi/f660eb7e22599b36bb8a1b9f65036fb9.mid
Saving data/Midi/5af385075eb2a03122a8b7d2a233c15e.mid
Saving data/Midi/17f3df180d6788e8dcc617f02fa336fb.mid
Saving data/Midi/9463536887c06cf31d8ed1407f886048.mid
Saving data/Midi/24a4adfb3d307222884a9ab4718b3c3d.mid
Saving data/Midi/36b668824f5477a016eb2eaaea59df56.mid
Saving d

In [9]:
def samples_to_midi(samples, fname, ticks_per_sample, thresh=0.5):
    mid = MidiFile()
    track = MidiTrack()
    mid.tracks.append(track)
    ticks_per_beat = mid.ticks_per_beat
    ticks_per_measure = 4 * ticks_per_beat
    ticks_per_sample = ticks_per_measure / samples_per_measure
    abs_time = 0
    last_time = 0
    for sample in samples:
        for y in range(sample.shape[0]):
            abs_time += ticks_per_sample
            for x in range(sample.shape[1]):
                note = x + (128 - num_notes)/2
                if sample[y,x] >= thresh and (y == 0 or sample[y-1,x] < thresh):
                    delta_time = abs_time - last_time
                    track.append(Message('note_on', note=int(note), velocity=127, time=int(delta_time)))
                    last_time = abs_time
                if sample[y,x] >= thresh and (y == sample.shape[0]-1 or sample[y+1,x] < thresh):
                    delta_time = abs_time - last_time
                    track.append(Message('note_off', note=int(note), velocity=127, time=int(delta_time)))
                    last_time = abs_time
    mid.save(fname)

In [None]:
def plotScores(scores, fname, on_top=True):
	plt.clf()
	ax = plt.gca()
	ax.yaxis.tick_right()
	ax.yaxis.set_ticks_position('both')
	ax.yaxis.grid(True)
	plt.plot(scores)
	plt.ylim([0.0, 0.009])
	plt.xlabel('Epoch')
	loc = ('upper right' if on_top else 'lower right')
	plt.draw()
	plt.savefig(fname)

def save_config():
	with open('config.txt', 'w') as fout:
		fout.write('LR:          ' + str(LR) + '\n')
		fout.write('BN_M:        ' + str(BN_M) + '\n')
		fout.write('BATCH_SIZE:  ' + str(BATCH_SIZE) + '\n')
		fout.write('NUM_OFFSETS: ' + str(NUM_OFFSETS) + '\n')
		fout.write('DO_RATE:     ' + str(DO_RATE) + '\n')
		fout.write('num_songs:   ' + str(num_songs) + '\n')
		fout.write('optimizer:   ' + type(model.optimizer).__name__ + '\n')

###################################
#  Load Keras
###################################
K.set_image_data_format('channels_first')

#Fix the random seed so that training comparisons are easier to make
np.random.seed(0)
random.seed(0)

if WRITE_HISTORY:
	#Create folder to save models into
	if not os.path.exists('History'):
		os.makedirs('History')

###################################
#  Load Dataset
###################################
print ("Loading Data...")
y_samples = np.load('samples.npy')
y_lengths = np.load('lengths.npy')
num_samples = y_samples.shape[0]
num_songs = y_lengths.shape[0]
print ("Loaded " + str(num_samples) + " samples from " + str(num_songs) + " songs.")
print (np.sum(y_lengths))
assert(np.sum(y_lengths) == num_samples)

print ("Padding Songs...")
x_shape = (num_songs * NUM_OFFSETS, 1)
y_shape = (num_songs * NUM_OFFSETS, MAX_LENGTH) + y_samples.shape[1:]
x_orig = np.expand_dims(np.arange(x_shape[0]), axis=-1)
y_orig = np.zeros(y_shape, dtype=y_samples.dtype)
cur_ix = 0
for i in range(num_songs):
	for ofs in range(NUM_OFFSETS):
		ix = i*NUM_OFFSETS + ofs
		end_ix = cur_ix + y_lengths[i]
		for j in range(MAX_LENGTH):
			k = (j + ofs) % (end_ix - cur_ix)
			y_orig[ix,j] = y_samples[cur_ix + k]
	cur_ix = end_ix
assert(end_ix == num_samples)
x_train = np.copy(x_orig)
y_train = np.copy(y_orig)

def to_song(encoded_output):
	return np.squeeze(decoder([np.round(encoded_output), 0])[0])

def reg_mean_std(x):
	s = K.log(K.sum(x * x))
	return s*s

def vae_sampling(args):
	z_mean, z_log_sigma_sq = args
	epsilon = K.random_normal(shape=K.shape(z_mean), mean=0.0, stddev=VAE_B1)
	return z_mean + K.exp(z_log_sigma_sq * 0.5) * epsilon

def vae_loss(x, x_decoded_mean):
	xent_loss = binary_crossentropy(x, x_decoded_mean)
	kl_loss = VAE_B2 * K.mean(1 + z_log_sigma_sq - K.square(z_mean) - K.exp(z_log_sigma_sq), axis=None)
	return xent_loss - kl_loss
	
test_ix = 0
y_test_song = np.copy(y_train[test_ix:test_ix+1])
x_test_song = np.copy(x_train[test_ix:test_ix+1])
samples_to_midi(y_test_song[0], 'gt.mid', 16)

CONTINUE_TRAIN = True
###################################
#  Create Model
###################################
if CONTINUE_TRAIN or PLAY_ONLY:
	print ("Loading Model...")
	model = load_model('History/model.h5')
else:
	print ("Building Model...")

	if USE_EMBEDDING:
		x_in = Input(shape=x_shape[1:])
		print (None,) + x_shape[1:]
		x = Embedding(x_train.shape[0], PARAM_SIZE, input_length=1)(x_in)
		x = Flatten(name='pre_encoder')(x)
	else:
		x_in = Input(shape=y_shape[1:])
		print ((None,) + y_shape[1:])
		x = Reshape((y_shape[1], -1))(x_in)
		print (K.int_shape(x))
		
		x = TimeDistributed(Dense(2000, activation='relu'))(x)
		print (K.int_shape(x))
		
		x = TimeDistributed(Dense(200, activation='relu'))(x)
		print (K.int_shape(x))

		x = Flatten()(x)
		print (K.int_shape(x))

		x = Dense(1600, activation='relu')(x)
		print (K.int_shape(x))
		
		if USE_VAE:
			z_mean = Dense(PARAM_SIZE)(x)
			z_log_sigma_sq = Dense(PARAM_SIZE)(x)
			x = Lambda(vae_sampling, output_shape=(PARAM_SIZE,), name='pre_encoder')([z_mean, z_log_sigma_sq])
		else:
			x = Dense(PARAM_SIZE)(x)
			x = BatchNormalization(momentum=BN_M, name='pre_encoder')(x)
	print (K.int_shape(x))
	
	x = Dense(1600, name='encoder')(x)
	x = BatchNormalization(momentum=BN_M)(x)
	x = Activation('relu')(x)
	if DO_RATE > 0:
		x = Dropout(DO_RATE)(x)
	print (K.int_shape(x))

	x = Dense(MAX_LENGTH * 200)(x)
	print (K.int_shape(x))
	x = Reshape((MAX_LENGTH, 200))(x)
	x = TimeDistributed(BatchNormalization(momentum=BN_M))(x)
	x = Activation('relu')(x)
	if DO_RATE > 0:
		x = Dropout(DO_RATE)(x)
	print (K.int_shape(x))

	x = TimeDistributed(Dense(2000))(x)
	x = TimeDistributed(BatchNormalization(momentum=BN_M))(x)
	x = Activation('relu')(x)
	if DO_RATE > 0:
		x = Dropout(DO_RATE)(x)
	print (K.int_shape(x))

	x = TimeDistributed(Dense(y_shape[2] * y_shape[3], activation='sigmoid'))(x)
	print (K.int_shape(x))
	x = Reshape((y_shape[1], y_shape[2], y_shape[3]))(x)
	print (K.int_shape(x))
	
	if USE_VAE:
		model = Model(x_in, x)
		model.compile(optimizer=Adam(lr=LR), loss=vae_loss)
	else:
		model = Model(x_in, x)
		model.compile(optimizer=RMSprop(lr=LR), loss='binary_crossentropy')

# 	keras.utils.plot_model(model, to_file='model.png', show_shapes=True)

###################################
#  Train
###################################
print ("Compiling SubModels...")
func = K.function([model.get_layer('encoder').input, K.learning_phase()],
				  [model.layers[-1].output])
enc = Model(inputs=model.input, outputs=model.get_layer('pre_encoder').output)

rand_vecs = np.random.normal(0.0, 1.0, (NUM_RAND_SONGS, PARAM_SIZE))
np.save('rand.npy', rand_vecs)

def make_rand_songs(write_dir, rand_vecs):
	for i in range(rand_vecs.shape[0]):
		x_rand = rand_vecs[i:i+1]
		y_song = func([x_rand, 0])[0]
		samples_to_midi(y_song[0], write_dir + 'rand' + str(i) + '.mid', 16, 0.25)

def make_rand_songs_normalized(write_dir, rand_vecs):
	if USE_EMBEDDING:
		x_enc = np.squeeze(enc.predict(x_orig))
	else:
		x_enc = np.squeeze(enc.predict(y_orig))
	
	x_mean = np.mean(x_enc, axis=0)
	x_stds = np.std(x_enc, axis=0)
	x_cov = np.cov((x_enc - x_mean).T)
	u, s, v = np.linalg.svd(x_cov)
	e = np.sqrt(s)

	print ("Means: ", x_mean[:6])
	print ("Evals: ", e[:6])
	
	np.save(write_dir + 'means.npy', x_mean)
	np.save(write_dir + 'stds.npy', x_stds)
	np.save(write_dir + 'evals.npy', e)
	np.save(write_dir + 'evecs.npy', v)

	x_vecs = x_mean + np.dot(rand_vecs * e, v)
	make_rand_songs(write_dir, x_vecs)
	
	title = ''
	if '/' in write_dir:
		title = 'Epoch: ' + write_dir.split('/')[-2][1:]
	
	plt.clf()
	e[::-1].sort()
	plt.title(title)
	plt.bar(np.arange(e.shape[0]), e, align='center')
	plt.draw()
	plt.savefig(write_dir + 'evals.png')

	plt.clf()
	plt.title(title)
	plt.bar(np.arange(e.shape[0]), x_mean, align='center')
	plt.draw()
	plt.savefig(write_dir + 'means.png')
	
	plt.clf()
	plt.title(title)
	plt.bar(np.arange(e.shape[0]), x_stds, align='center')
	plt.draw()
	plt.savefig(write_dir + 'stds.png')

if PLAY_ONLY:
	print ("Generating Songs...")
	make_rand_songs_normalized('', rand_vecs)
	for i in range(20):
		x_test_song = x_train[i:i+1]
		y_song = model.predict(x_test_song, batch_size=BATCH_SIZE)[0]
		samples_to_midi(y_song, 'gt' + str(i) + '.mid', 16)
	exit(0)
		  
print ("Training...")
save_config()
train_loss = []
ofs = 0

for iter in range(NUM_EPOCHS):
	if USE_EMBEDDING:
		history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1)
	else:
		cur_ix = 0
		for i in range(num_songs):
			end_ix = cur_ix + y_lengths[i]
			for j in range(MAX_LENGTH):
				k = (j + ofs) % (end_ix - cur_ix)
				y_train[i,j] = y_samples[cur_ix + k]
			cur_ix = end_ix
		assert(end_ix == num_samples)
		ofs += 1

		history = model.fit(y_train, y_train, batch_size=BATCH_SIZE, epochs=1)

	loss = history.history["loss"][-1]
	train_loss.append(loss)
	print ("Train Loss: " + str(train_loss[-1]))
	
# 	if WRITE_HISTORY:
# 		plotScores(train_loss, 'History/Scores.png', True)
# 	else:
	plotScores(train_loss, 'Scores.png', True)
	
	i = iter + 1
	if i in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450] or (i % 100 == 0):
		write_dir = ''
		if WRITE_HISTORY:
			#Create folder to save models into
			write_dir = 'History/e' + str(i)
			if not os.path.exists(write_dir):
				os.makedirs(write_dir)
			write_dir += '/'
			model.save('History/model.h5')
		else:
			model.save('model.h5')
		print ("Saved")

		if USE_EMBEDDING:
			y_song = model.predict(x_test_song, batch_size=BATCH_SIZE)[0]
		else:
			y_song = model.predict(y_test_song, batch_size=BATCH_SIZE)[0]
		samples_to_pics(write_dir + 'test', y_song)
		samples_to_midi(y_song, write_dir + 'test.mid', 16)

		make_rand_songs_normalized(write_dir, rand_vecs)

print ("Done")

Loading Data...
Loaded 462000 samples from 4414 songs.
462000
Padding Songs...
Loading Model...
Compiling SubModels...
Training...
Epoch 1/1
Train Loss: 0.0069536989125181334
Saved
Means:  [ 0.24698126 -0.22932215 -0.1237676  -0.0932721   0.01491608 -0.05897615]
Evals:  [7.47212345 3.39138483 3.14438544 1.8611597  1.45795372 0.91083044]
Epoch 1/1
Train Loss: 0.004715183424654931
Epoch 1/1
Train Loss: 0.0039335622032956605
Epoch 1/1
Train Loss: 0.0035151346310454967
Epoch 1/1
Train Loss: 0.003222990447700051
Epoch 1/1
Train Loss: 0.0030803098404500674
Epoch 1/1
Train Loss: 0.0029949355165980076
Epoch 1/1
Train Loss: 0.002915921668181517
Epoch 1/1
Train Loss: 0.00291122258782191
Epoch 1/1
Train Loss: 0.002845007002182342
Saved
Means:  [-0.4880088   0.57497776 -0.21995753  0.73921454  1.2468133  -0.01528124]
Evals:  [6.65053007 3.99326704 3.804826   3.70057422 2.73748005 2.32916359]
Epoch 1/1
Train Loss: 0.0028477250376828505
Epoch 1/1
Train Loss: 0.0028212879496188957
Epoch 1/1
Train Los

In [None]:
ls