In [1]:
from music21 import converter, instrument, note, chord, stream
import glob
import pickle
import numpy as np
from keras.utils import np_utils

Using TensorFlow backend.


### Loading Data

In [2]:
with open("./Files/notes.pkl", 'rb') as f:
    notes= pickle.load(f)

In [3]:
n_vocab = len(set(notes))

In [4]:
print("Total notes- ", len(notes))
print("Unique notes- ",  n_vocab)

Total notes-  60498
Unique notes-  359


In [5]:
# notes is a list of all the notes in each music file - 60000
print(notes[:100])

['4+9', 'E2', '4+9', '4+9', '4+9', '4+9', '4+9', '4+9', '4+9', '11+4', '4+9', '11+4', '4+9', '4+9', '4+9', '4+9', '4+9', '0+4', 'E2', '4+9', '0+4', '4+9', '4+9', '4+9', '4+9', '4+9', '9+2', '4+9', '9+2', '9+2', '4+9', '4+9', '4+9', '4+9', '4+9', '4+9', 'E2', '4+9', '4+9', '4+9', '4+9', '4+9', 'E5', 'F5', 'G#5', 'A5', '4+9', '4+9', '5+11', '4+9', '5+11', '4+9', '4+9', '4+9', 'E5', 'F5', 'G#5', 'A5', '4+9', '4+9', '9+0', 'E2', '4+9', '9+0', '4+9', '4+9', '4+9', 'E5', 'F5', 'G#5', 'A5', '4+9', '4+9', '11+2', '4+9', '11+2', '11+2', '4+9', '4+9', '4+9', 'E5', 'F5', 'G#5', 'A5', '4+9', '4+9', '3+7+11', 'E-2', '3+7+11', 'B2', 'G2', '1+5+9', 'F#2', '1+5+9', '3+7+11', 'E-2', '3+7+11', 'G2', 'B2', 'E-3']


## Prepare Sequential Data for LSTM

In [6]:
# Hoe many elements LSTM input should consider
sequence_length = 100

In [7]:
# ALl the unique elements in a sorted manner
pitchnames = sorted(set(item for item in notes))

In [8]:
# Mapping between note to int value
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
print(note_to_int)

{'0': 0, '0+1': 1, '0+1+5': 2, '0+1+6': 3, '0+2': 4, '0+2+3+7': 5, '0+2+4+7': 6, '0+2+5': 7, '0+2+6': 8, '0+2+7': 9, '0+3': 10, '0+3+5': 11, '0+3+5+8': 12, '0+3+6': 13, '0+3+6+8': 14, '0+3+6+9': 15, '0+3+7': 16, '0+4': 17, '0+4+5': 18, '0+4+6': 19, '0+4+7': 20, '0+5': 21, '0+5+6': 22, '0+6': 23, '1': 24, '1+2': 25, '1+2+4+6+8+10': 26, '1+2+6': 27, '1+2+6+8': 28, '1+3': 29, '1+3+5': 30, '1+3+5+8': 31, '1+3+6': 32, '1+3+7': 33, '1+3+8': 34, '1+4': 35, '1+4+6': 36, '1+4+6+9': 37, '1+4+7': 38, '1+4+7+10': 39, '1+4+7+9': 40, '1+4+8': 41, '1+5': 42, '1+5+8': 43, '1+5+9': 44, '1+6': 45, '1+7': 46, '10': 47, '10+0': 48, '10+0+2+5': 49, '10+0+3': 50, '10+0+4': 51, '10+0+5': 52, '10+1': 53, '10+1+3': 54, '10+1+3+5+6': 55, '10+1+3+6': 56, '10+1+4': 57, '10+1+4+6': 58, '10+1+5': 59, '10+11': 60, '10+11+3': 61, '10+11+3+5': 62, '10+2': 63, '10+2+3': 64, '10+2+4': 65, '10+2+5': 66, '10+3': 67, '11': 68, '11+0': 69, '11+0+4': 70, '11+0+4+6': 71, '11+0+4+7': 72, '11+0+5': 73, '11+1': 74, '11+1+4': 75,

In [9]:
network_input = []
network_output = []

In [10]:
# Make the data for LSTM Network
# Each Node will contain 100 input units
# Output will be the next unit in notes list

for i in range(0, len(notes) - sequence_length, 1):
    
    sequence_in = notes[i:i + sequence_length] # contains 100 values 
    sequence_out = notes[i + sequence_length] # containes next values for these 100's
    
    # Since NN works with numeric data only, append the int values for inputs and outputs.
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])

In [11]:
# No. of examples for our network

n_patterns = len(network_input)
print(n_patterns)

60398


In [13]:
# now network_input is lits, but reshape it into a format compatible with LSTM layers
network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
print(network_input.shape)

(60398, 100, 1)


In [14]:
normalised_network_input = network_input/float(n_vocab)

In [15]:
# network_output are the classes, encode one_hot_vector
network_output = np_utils.to_categorical(network_output)

In [16]:
print(network_output.shape)
print(normalised_network_input.shape)

(60398, 359)
(60398, 100, 1)


### Store the Info.

In [17]:
with open("./Files/network_input.pkl", "wb") as f:
    pickle.dump(normalised_network_input, f)
    
with open("./Files/network_output.pkl", "wb") as f:
    pickle.dump(network_output, f)