In [1]:
import pretty_midi
import numpy as np
import itertools
import random
import glob
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn

#Checks current directory for the midi file extension and returns a list of all the midi files
def getMidi():
    return glob.glob("Midi-Files/*.mid")

def chordTransform(chord): 
    #all possible triad chords
    triads = {
            'major' : [4, 3],
            'minor' : [3, 4],
            'dim' : [3, 3],
            'aug' : [4, 4]
        }
    
    #If not triad then returns a random note of the chord
    if len(chord) != 3:
        root_note = random.choice(chord)
        return root_note
    
    #Finds the corresponding notes and its root note
    root_chord = {}
    for note in chord:
        root_chord[note]= note%12
    
    # Get all possible permutations of these notes
    note_perms = list(itertools.permutations(list(root_chord.values())))

    # Test each permutation against the possible triad intervals and return the triad type if there's a match.
    for i in range(len(note_perms)-1):
        notes_intervals = []
        posRoot_note = 99
        root_note = 99

        # Loop through notes and create a list, length 2, of intervals to check against
        for j in range(len(chord)-1):
            
            #Stores the current and next note in the possible permutations
            note_A = note_perms[i][j]
            note_B = note_perms[i][j+1]
            
            #finds the interval
            interval = note_B - note_A
            
            #If the interval is negative then loops around just a different octave
            if interval < 0:
                interval = interval + 12
                
            #Store the interval
            notes_intervals.append(interval)
            
            #The lowest note is the possible root note so checks for that and stores it
            if note_A <= note_B:
                if note_A < posRoot_note:
                    posRoot_note = note_A
            if note_B <= note_A: 
                if note_B < posRoot_note:
                    posRoot_note = note_B
                    
        # Finally loop through the traids dict to see if we have a match for a triad
        for t in triads.keys():
            if triads[t] == notes_intervals:
                
                #If so the root note is the lowest note of the triad
                #This method finds a key given a value
                for real_root, pseudo_root in root_chord.items():
                    if pseudo_root == posRoot_note:
                        return real_root
            
    #If not then the root note is a random note from the collection of notes
    if root_note not in range(12):
        root_note = random.choice(list(root_chord.keys()))
        return root_note

#@inputs: note_array is a matrix that is 128xinstrument.get_piano_roll() long. The number of columns is dependent upon how
            # sample will be split by time

#@returns: a vector that contains the root note at each time sample
def instrument_to_vector(note_array):
    note_array_transpose = np.transpose(note_array)
    note_vector = np.empty(note_array.shape[1])
    note_vector.fill(-1)
    for i in range(note_array_transpose.shape[0]): #The i here will be the column number of the transpose, which is the note
                                                   #This loop should iterate through the number of columns in transpose
        note_list=[]
        for number in note_array_transpose[i]:
            if number!=-1:
                note_list.append(number) #add the number aka the note being played 
                                        # if there is no number there is no note played so that place is 0
            if len(note_list)!=1:
                note_vector[i]=-1
            else:
                note_vector[i]=note_list[0]
    return note_vector

def NoteMatrix(midi_data, samplesPerSec):
    #Defines how many samples per second
    fs = samplesPerSec

    #Returns the total amount of samples gotten
    y = np.arange(0, midi_data.get_end_time(), 1./fs).shape[0]

    #Our desired matrix has the amount of samples for every possible instrument
    #noteMatrix = np.zeros(shape=(128, y))
    noteMatrix = np.empty(shape=(128,y))
    noteMatrix.fill(-1)

    #Iterates through all the instruments of the midi song
    for instrument in midi_data.instruments:

        #Creates an array of all the notes the instrument can possibly play over a time sample and its velocity
        total_notes = np.asarray(instrument.get_piano_roll(fs=fs, times=np.arange(0, midi_data.get_end_time(), 1./fs)))
        total_notes[total_notes == 0] = -1
        
        #Holder for the final array that converts chords into notes making all instruments monophonic
        converted_notes = np.zeros(shape=total_notes.shape)

        #Goes through each time sample to see if notes repeat, if so find the root node of this chord
        i=0
        
        for column in total_notes.T:

            #Notes repeat in a time slice
            if count_nonNegOne(column) > 1:

                #create a list containing the notes played
                chord = np.where(column>=0)[0]
                
                if len(chord) > 0:
                    #finds the root note of the chord
                    root_note = chordTransform(chord)

                    #removes all other notes other than the root
                    for note in chord:
                        if note != root_note:
                            column[note] = -1

                    #Classify the time slice by the root note itself not velocity
                    column[root_note] = root_note

                #Store in the converted notes
            converted_notes[:, i] = column
            i += 1

        #As every time splice has only one note with the note defined, convert into vector
        instrument_vector = instrument_to_vector(converted_notes)

        #For that instrument store the vector of the notes played out of all
        noteMatrix[instrument.program] = instrument_vector
    return noteMatrix

#Testing purposes
def count_nonNegOne(array):
    count = 0
    for i in array:
        if i != -1:
            count += 1
    return count

#For the creation of the label array for the RNN
def get_label_vector(label):
    label_array = np.zeros((1,7))
    label_dict = {'hh':0,
                 'cl':1,
                 'cn':2,
                 'ro':3,
                 'ed':4,
                 'pp':5,
                 'mt':6}
    label_array[0][label_dict[label]] = 1
    return label_array

#def main():
    #Makes a list of all the note matrices for all midis
#    midi_note = []

    #Makes a list of all the labels for each corresponding midi's note matrix
#    midi_label = []

    #Iterates through all midis
#    for midi in getMidi():

        #Opens midi as a pretty midi file
#        midi_data = pretty_midi.PrettyMIDI(midi)

        #creates the note matrix
#        noteMatrix = NoteMatrix(midi_data, 10)

        #adds to list of matrices
#        midi_note.append(noteMatrix)

        #stores the label of the midi file which is the first two letters of each midi
#        midi_label.append(midi[:2])
        
#if __name__== "__main__":
#    main()

In [2]:
#Makes a list of all the note matrices for all midis
midi_note = []

#Makes a list of all the labels for each corresponding midi's note matrix
midi_label = []

#number of samples per second
fs = 20

#Iterates through all midis
for midi in getMidi():

    #Opens midi as a pretty midi file
    midi_data = pretty_midi.PrettyMIDI(midi)

    #creates the note matrix
    noteMatrix = NoteMatrix(midi_data, fs)
    
    #only gets the slice from 30 seconds to 90 seconds
    start = 30*fs
    end = 90*fs
    
    #Stores sequence length for tensorflow
    sequence_length = end-start
    
    #Stores the first minute
    noteMatrix = noteMatrix[:, start:end].T
    
    #adds to list of matrices
    midi_note.append(noteMatrix)

    #stores the label of the midi file which is the first two letters of each midi
    abbrev = midi.split("\\")[1][:2]
    midi_label.append(get_label_vector(abbrev))



In [4]:
# Training Parameters
learning_rate = 0.0001
training_steps = 500
batch_size = 1
display_step = 10

# Network Parameters
num_input = 128 #instruments
timesteps = 60*fs # timesteps
num_hidden = 4 # hidden layer num of features
num_classes = 7 #Total amount of genres

# tf Graph input
X = tf.placeholder("float", [None, timesteps, num_input])
Y = tf.placeholder("float", [None, num_classes])

In [5]:
# Define weights
weights = {
    'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))
}
biases = {
    'out': tf.Variable(tf.random_normal([num_classes]))
}

In [6]:
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, timesteps, 1)

    # Define a lstm cell with tensorflow
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

In [7]:
logits = RNN(X, weights, biases)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [8]:
# Start training
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    for step in range(1, training_steps+1):
        for i in range(len(midi_note)-5):
            batch_x = midi_note[i]
            batch_y = midi_label[i]
            # Reshape data to get 60*fs seq of 128 elements
            batch_x = batch_x.reshape((batch_size, timesteps, num_input))
            # Run optimization op (backprop)
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
            if step % display_step == 0 or step == 1:
                # Calculate batch loss and accuracy
                loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x,
                                                                     Y: batch_y})
                print("Step " + str(step) + ", Minibatch Loss= " + \
                      "{:.4f}".format(loss) + ", Training Accuracy= " + \
                      "{:.3f}".format(acc))

    print("Optimization Finished!")

    # Calculate accuracy for test
    for i in range(5):
        i = i+15
        test_data = midi_note[i]
        test_label = midi_label[i]
        # Reshape data to get 60*fs seq of 128 elements
        test_data = test_data.reshape((batch_size, timesteps, num_input))
        print("Testing Accuracy:", \
            sess.run(accuracy, feed_dict={X: test_data, Y: test_label}))

Step 1, Minibatch Loss= 1.7463, Training Accuracy= 0.000
Step 1, Minibatch Loss= 0.5903, Training Accuracy= 1.000
Step 1, Minibatch Loss= 1.9071, Training Accuracy= 0.000
Step 1, Minibatch Loss= 0.5889, Training Accuracy= 1.000
Step 1, Minibatch Loss= 2.0698, Training Accuracy= 0.000
Step 1, Minibatch Loss= 1.7409, Training Accuracy= 0.000
Step 1, Minibatch Loss= 1.2268, Training Accuracy= 0.000
Step 1, Minibatch Loss= 1.6172, Training Accuracy= 0.000
Step 1, Minibatch Loss= 0.6885, Training Accuracy= 1.000
Step 1, Minibatch Loss= 1.6740, Training Accuracy= 0.000
Step 1, Minibatch Loss= 1.8930, Training Accuracy= 0.000
Step 1, Minibatch Loss= 0.3213, Training Accuracy= 1.000
Step 1, Minibatch Loss= 1.2627, Training Accuracy= 0.000
Step 1, Minibatch Loss= 1.2518, Training Accuracy= 0.000
Step 1, Minibatch Loss= 2.2075, Training Accuracy= 0.000
Step 10, Minibatch Loss= 1.7359, Training Accuracy= 0.000
Step 10, Minibatch Loss= 0.5852, Training Accuracy= 1.000
Step 10, Minibatch Loss= 1.70

Step 90, Minibatch Loss= 0.4999, Training Accuracy= 1.000
Step 90, Minibatch Loss= 0.5848, Training Accuracy= 1.000
Step 90, Minibatch Loss= 1.1847, Training Accuracy= 1.000
Step 90, Minibatch Loss= 0.6041, Training Accuracy= 1.000
Step 90, Minibatch Loss= 0.2865, Training Accuracy= 1.000
Step 90, Minibatch Loss= 1.1867, Training Accuracy= 1.000
Step 90, Minibatch Loss= 0.9187, Training Accuracy= 1.000
Step 90, Minibatch Loss= 2.1101, Training Accuracy= 0.000
Step 100, Minibatch Loss= 1.6448, Training Accuracy= 0.000
Step 100, Minibatch Loss= 0.5358, Training Accuracy= 1.000
Step 100, Minibatch Loss= 0.5513, Training Accuracy= 1.000
Step 100, Minibatch Loss= 0.5347, Training Accuracy= 1.000
Step 100, Minibatch Loss= 1.9665, Training Accuracy= 0.000
Step 100, Minibatch Loss= 1.6416, Training Accuracy= 0.000
Step 100, Minibatch Loss= 1.0039, Training Accuracy= 0.000
Step 100, Minibatch Loss= 0.4579, Training Accuracy= 1.000
Step 100, Minibatch Loss= 0.5614, Training Accuracy= 1.000
Step 

Step 180, Minibatch Loss= 0.2570, Training Accuracy= 1.000
Step 180, Minibatch Loss= 1.1243, Training Accuracy= 1.000
Step 180, Minibatch Loss= 0.3362, Training Accuracy= 1.000
Step 180, Minibatch Loss= 0.8800, Training Accuracy= 1.000
Step 190, Minibatch Loss= 1.5690, Training Accuracy= 0.000
Step 190, Minibatch Loss= 0.4918, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.3489, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.4907, Training Accuracy= 1.000
Step 190, Minibatch Loss= 1.8872, Training Accuracy= 0.000
Step 190, Minibatch Loss= 1.5666, Training Accuracy= 0.000
Step 190, Minibatch Loss= 0.9484, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.3263, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.3858, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.9243, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.3485, Training Accuracy= 1.000
Step 190, Minibatch Loss= 0.2541, Training Accuracy= 1.000
Step 190, Minibatch Loss= 1.1180, Training Accuracy= 1.0

Step 280, Minibatch Loss= 1.5030, Training Accuracy= 0.000
Step 280, Minibatch Loss= 0.4524, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.2981, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.4515, Training Accuracy= 1.000
Step 280, Minibatch Loss= 1.8159, Training Accuracy= 0.000
Step 280, Minibatch Loss= 1.5012, Training Accuracy= 0.000
Step 280, Minibatch Loss= 0.9019, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.2877, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.3463, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.5598, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.2977, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.2299, Training Accuracy= 1.000
Step 280, Minibatch Loss= 1.0655, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.2480, Training Accuracy= 1.000
Step 280, Minibatch Loss= 0.8166, Training Accuracy= 1.000
Step 290, Minibatch Loss= 1.4960, Training Accuracy= 0.000
Step 290, Minibatch Loss= 0.4483, Training Accuracy= 1.0

Step 370, Minibatch Loss= 0.4167, Training Accuracy= 1.000
Step 370, Minibatch Loss= 1.7479, Training Accuracy= 0.000
Step 370, Minibatch Loss= 1.4401, Training Accuracy= 0.000
Step 370, Minibatch Loss= 0.8594, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.2584, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.3164, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.4722, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.2646, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.2090, Training Accuracy= 1.000
Step 370, Minibatch Loss= 1.0171, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.2174, Training Accuracy= 1.000
Step 370, Minibatch Loss= 0.7743, Training Accuracy= 1.000
Step 380, Minibatch Loss= 1.4351, Training Accuracy= 0.000
Step 380, Minibatch Loss= 0.4139, Training Accuracy= 1.000
Step 380, Minibatch Loss= 0.2618, Training Accuracy= 1.000
Step 380, Minibatch Loss= 0.4131, Training Accuracy= 1.000
Step 380, Minibatch Loss= 1.7404, Training Accuracy= 0.0

Step 460, Minibatch Loss= 0.2914, Training Accuracy= 1.000
Step 460, Minibatch Loss= 0.4028, Training Accuracy= 1.000
Step 460, Minibatch Loss= 0.2384, Training Accuracy= 1.000
Step 460, Minibatch Loss= 0.1908, Training Accuracy= 1.000
Step 460, Minibatch Loss= 0.9724, Training Accuracy= 1.000
Step 460, Minibatch Loss= 0.1954, Training Accuracy= 1.000
Step 460, Minibatch Loss= 0.7368, Training Accuracy= 1.000
Step 470, Minibatch Loss= 1.3785, Training Accuracy= 0.000
Step 470, Minibatch Loss= 0.3833, Training Accuracy= 1.000
Step 470, Minibatch Loss= 0.2361, Training Accuracy= 1.000
Step 470, Minibatch Loss= 0.3826, Training Accuracy= 1.000
Step 470, Minibatch Loss= 0.7612, Training Accuracy= 1.000
Step 470, Minibatch Loss= 1.3773, Training Accuracy= 0.000
Step 470, Minibatch Loss= 0.8164, Training Accuracy= 1.000
Step 470, Minibatch Loss= 0.2317, Training Accuracy= 1.000
Step 470, Minibatch Loss= 0.2889, Training Accuracy= 1.000
Step 470, Minibatch Loss= 0.3993, Training Accuracy= 1.0