# Music Generation Using neural Networks

### Importing all the required libraries

In [23]:
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, SimpleRNN, LSTM, Dropout
from keras.callbacks import History
from keras import backend as K
import keras

In [2]:
def sample(preds, temperature):
    # Helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

### Loading the data

Loading the ABC format music sheet from the .txt format file into `data_str`. `path`

Function name: `loadData(path)`

Parameters: `path`-  gives the path where data is present along with the file name. If data and notebook(code) is present in same folder, skip the rest of the path and just mention the file name. 

Returns: The loaded data in a txt format

In [3]:
def loadData(path):
    return (open(path)).read()
    
data_str=loadData("Database2.txt")
print(data_str)
print("total data length", len(data_str))

<start>
X:1
T: La Montfarine
Z:Transcrit et/ou corrig? par Michel BELLON - 2005-07-24
Z:Pour toute observation mailto:galouvielle@free.fr
M: 4/4
L: 1/8
Q:1/4=186
FGF B=AG G=AG F2F FGF {F}F2E EFE|
{E}E2D FGF B=AG G=AG {F}F2F FED C2G D2E|F3 {F}F/2 ED E3/2D/2|
EC FE E2 DC|DC C2 GD2E|F3F/2F/2 DE FD|EC B,C D2 B,G|
cB =A2 FG2E|F3F GF B=A|GG =AG F2 FF|GF F/2F3/2 EE FE|
E/2E3/2 DF GF B=A|GG =AG F/2F3/2 FF|ED C2 GD2E|F3F/2F/2 ED E3/2D/2|
EC FE E2 DC|DC C2 GD2E|F3F/2F/2 DE FD|EC B,C D2 B,G|
cB =A2 FF G=A|B3
G2_A G/2G3/2-|GF/2F/2 ED F2 EB,|CD EF GB AG|
A3A GA F2|F/2F/2F EF D3/2B,/2 B,B,|CD EF GA GF|G3B/2B/2 AB G2|
GG/2G/2 FG E2 EB/2B/2|AG BA GG AB|c3c/2c3/2d f2|ed eG c2 BB|
AB dc BG AD|E3F GF BA|GG AG F2 FF|GF F/2F3/2 EE FE|
E/2E3/2 DF GF BA|GG AG F/2F3/2 FF|ED C2 GD2E|F2- F/2
<end>
<start>
X: 2
T:Stella splendens
C:Livre Vermeil de Montserrat (
<end>
<start>
XIV?me si?cle)
Z:Transcrit et/ou corrig? par Michel BELLON - 2005-03-27
Z:Pour toute observation mailto:galouvielle@free.fr
M:2/2
L:1/4
Q:1

### Vocabulary setting

Here, `data` stores each charecter present in the data in a list of charecters. The `data_set` is, the set of all the charecters present in the `data` and it is further sorted. The `data_set` presents the total vocabulary of charecters we have in the sheet music.

`vocablLen` stores this length of the total vocabulary(distinct charecters)

In [4]:
def findVocab(dataStr):
    return sorted(set(dataStr))

In [5]:
data = [i for i in data_str]
data_set=findVocab(data_str)
print("data is ", data[0:100])
print("data set is \n", data_set)
vocabLen=len(data_set)
print("Length of vocabulary = ",vocabLen )

data is  ['<', 's', 't', 'a', 'r', 't', '>', '\n', 'X', ':', '1', '\n', 'T', ':', ' ', 'L', 'a', ' ', 'M', 'o', 'n', 't', 'f', 'a', 'r', 'i', 'n', 'e', '\n', 'Z', ':', 'T', 'r', 'a', 'n', 's', 'c', 'r', 'i', 't', ' ', 'e', 't', '/', 'o', 'u', ' ', 'c', 'o', 'r', 'r', 'i', 'g', '?', ' ', 'p', 'a', 'r', ' ', 'M', 'i', 'c', 'h', 'e', 'l', ' ', 'B', 'E', 'L', 'L', 'O', 'N', ' ', '-', ' ', '2', '0', '0', '5', '-', '0', '7', '-', '2', '4', '\n', 'Z', ':', 'P', 'o', 'u', 'r', ' ', 't', 'o', 'u', 't', 'e', ' ', 'o']
data set is 
 ['\t', '\n', ' ', '!', '"', '#', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']
Len

### Enumeration of vocalbulary

`char_2_idx` is the charecter to index dictionary, where each charecter in the vocabulary/data set is given a unique number in ascending order of their occurence to refer them (index) by the sequential Neural Network. `idx_2_char` stores the values of each index or their corresponding charecter from the vocabulary

In [6]:
def indexing(d):
    return {ch: i for i, ch in enumerate(d)}, {i: ch for i, ch in enumerate(d)}

In [7]:
char_2_idx,idx_2_char=indexing(data_set)
# char_2_idx = {ch: i for i, ch in enumerate(data_set)}## each element in the vocubulary is presented by a number which is its index
# idx_2_char = {i: ch for i, ch in enumerate(data_set)}## each index is assigned an element from the vocabulary
print("character to index enumeration",char_2_idx)
print("index to character enumeration",idx_2_char)

character to index enumeration {'\t': 0, '\n': 1, ' ': 2, '!': 3, '"': 4, '#': 5, '&': 6, "'": 7, '(': 8, ')': 9, '*': 10, '+': 11, ',': 12, '-': 13, '.': 14, '/': 15, '0': 16, '1': 17, '2': 18, '3': 19, '4': 20, '5': 21, '6': 22, '7': 23, '8': 24, '9': 25, ':': 26, '<': 27, '=': 28, '>': 29, '?': 30, '@': 31, 'A': 32, 'B': 33, 'C': 34, 'D': 35, 'E': 36, 'F': 37, 'G': 38, 'H': 39, 'I': 40, 'J': 41, 'K': 42, 'L': 43, 'M': 44, 'N': 45, 'O': 46, 'P': 47, 'Q': 48, 'R': 49, 'S': 50, 'T': 51, 'U': 52, 'V': 53, 'W': 54, 'X': 55, 'Y': 56, 'Z': 57, '[': 58, '\\': 59, ']': 60, '^': 61, '_': 62, 'a': 63, 'b': 64, 'c': 65, 'd': 66, 'e': 67, 'f': 68, 'g': 69, 'h': 70, 'i': 71, 'j': 72, 'k': 73, 'l': 74, 'm': 75, 'n': 76, 'o': 77, 'p': 78, 'q': 79, 'r': 80, 's': 81, 't': 82, 'u': 83, 'v': 84, 'w': 85, 'x': 86, 'y': 87, 'z': 88, '{': 89, '|': 90, '}': 91, '~': 92}
index to character enumeration {0: '\t', 1: '\n', 2: ' ', 3: '!', 4: '"', 5: '#', 6: '&', 7: "'", 8: '(', 9: ')', 10: '*', 11: '+', 12: ',

### Splitting of training and validation data

90% of the total data is taken as the training data while the rest 10% is used as the validation data. `data` is the numpy array which stores the index of each element in our abc sheet as corresponding to the indices defined in the `char_2_idx` dictionary. `totalChars` give the total number of charecters/the total number of indices in the data

`train_data` divides this index numpy array from starting to 90% of data length. This acts as the input to the LSTM/Sequential Neural Network. `y_train` acts as the target vector or the labels for the LSTM. It divides the indexed array of the whole abc format song from starting + 1 or the second index and takes 90% of the data. Thus, we bascially produce a charecter to charecter predictor.

`train_data` is further converted to one hot vectorization using to_categorial function

In [8]:
data = np.array([char_2_idx[i] for i in data_str])#data is the index array of each charecter in the abc file
totalChars=len(data)
print(data)
print(totalChars)

[27 81 82 ..., 76 66 29]
501470


#### Training Data Slicing

In [9]:
train_data = data[0:int(0.9 * len(data))]#train data is the training data which is the total data from the first position
y_train = data[1:int(0.9 * len(data) + 1)]#target data is the next data to the training input. This is because for sequence 
#problem, the next data in the sequence is our actual answer
trainLen=len(train_data)
print("length of training dataand labels",trainLen,len(y_train))
#print("training data indexed charecters",train_data)
#print("training target indexed charecters",y_train)
train_data = to_categorical(train_data, len(data_set))
#print(train_data)#one hot vectorization
print(train_data.shape)

length of training dataand labels 451323 451323
(451323, 93)


### Preparing Data Batches

`batch_size` deifnes the charecters we take at a time as input to the LSTM

`length_to_keep` gives the total length of the data in a batch.

`train_data` is sliced into 25 batch sized smaller arrays and thenreshaped into a 3D array containing 25 batch sized, 4680 charecters, each written in their one hot representation of 88 size.

In [10]:
# Preparing data batches
batch_size = 25
length_to_keep = int(len(train_data) / batch_size) * len(data_set)*batch_size
print(length_to_keep)
train_data = train_data[0:int(length_to_keep / len(data_set)), :].copy()
print(train_data.shape)
train_data = np.reshape(train_data, (int(len(train_data) / batch_size), batch_size, len(data_set)))
print(train_data.shape)

41970900
(451300, 93)
(18052, 25, 93)


`X` and `y` are the input taken from 0the element to (last-1)^th element and the target which is the same array taken from first element to the last element. Hence, the labels are 1 time shifted or the next charecters of the training data charecters in each batch. Each charecter is present as a one hot vector of length 88

In [11]:
X = train_data[:, :-1, :]
y = train_data[:, 1:, :]
print(X.shape)
print(y.shape)

(18052, 24, 93)
(18052, 24, 93)


### Validation Data Preperation

`val_data` slices the last 10% of the data to the y_val `y_val` is the target validation data or the labels. `data` here refers to the indexed value of each charecter in the ABC notation in the sequence it appears.

We then convert the val_data to the one hot representation and save it in `X_val`. Here the `length_to_keep` secifies the length of data which is to be kept according to the batch size, X_val is taken in bacths and then reshaped accordingly as done with the training data 

In [12]:
val_data = data[int(0.9 * len(data)):-1]
y_val = data[int(0.9 * len(data)) + 1:]
print("length of validation data \n", len(val_data))
print("val data\n", val_data)
print("length of validation labels(targets) \n", len(y_val))
print("val targets\n", y_val)

X_val = to_categorical(val_data, len(data_set))
print("categorical validation data\n", X_val)
length_to_keep = int(len(X_val) / batch_size) * len(data_set)* batch_size
print("length to keep\n",length_to_keep)
X_val = X_val[0:int(length_to_keep / len(data_set)), :].copy()
print("data validation which is sliced is\n",X_val)
print("old shape", X_val.shape)
X_val = np.reshape(X_val, (int(len(X_val) / batch_size), batch_size, len(data_set)))
print("new shape", X_val.shape)

length of validation data 
 50146
val data
 [83 75 77 ..., 67 76 66]
length of validation labels(targets) 
 50146
val targets
 [75 77 83 ..., 76 66 29]
categorical validation data
 [[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
length to keep
 4661625
data validation which is sliced is
 [[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  1. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
old shape (50125, 93)
new shape (2005, 25, 93)


Last batch is taken first

In [13]:
y_val = X_val[:, 1:, :]#last batch
X_val = X_val[:, :-1, :]#last batch
print (y_val.shape)
print(X_val.shape)

(2005, 24, 93)
(2005, 24, 93)


### Model Parameters

`epochs` specify the number of epochs we want to run our training for

`vocab_size` is the size of the voczabulary in our input

`input_dim` is the input dimensions(max input size) for the LSTM which is equal to the vocabulary length

`output_dim` are the output or the target dimensions which are the same as input dimensions or the total vocabulary size

`hidden_dim` are the number of simple ANNs the LSTM unrolls into. It specifies the number of hidden layers of one LSTM

In [14]:
epochs = 100
vocab_size = len(data_set)
input_dim = vocab_size
output_dim = vocab_size
hidden_dim = 128


print("No. of epochs", epochs)
print("vocabulary size", vocab_size)
print("input and output sizes", input_dim, output_dim)
print("hidden layer dimensions(unrollability of LSTM)", hidden_dim)

No. of epochs 100
vocabulary size 93
input and output sizes 93 93
hidden layer dimensions(unrollability of LSTM) 128


### LSTM MODEL

The model comprises of a two LSTMs, with a dropoutof 0.1 after each LSTM, each of which is a 128 multi layered LSTM.

`return_sequences`= True specifies that all the layered outputs of the LSTM are taken into account

In [15]:
rnn_model = Sequential()
rnn_model.add(keras.layers.LSTM(hidden_dim, input_shape=(None, vocab_size),activation='tanh', return_sequences=True))
rnn_model.add(Dropout(0.1))
rnn_model.add(keras.layers.LSTM(hidden_dim, input_shape=(None, vocab_size),activation='tanh'))
#rnn_model.add(SimpleRNN(hidden_dim, activation='tanh', input_shape=(None, vocab_size)))
rnn_model.add(Dropout(0.1))
rnn_model.add(Dense(output_dim))
rnn_model.add(Activation('softmax'))
rnn_model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
rnn_model.summary()



# model.add(LSTM(hidden_size, input_dim=hidden_size, input_length=seq_length, activation='tanh', return_sequences=True))
# model.add(Dropout(0.1))
# model.add(LSTM(hidden_size, input_dim=hidden_size, input_length=seq_length, activation='tanh', return_sequences=True))
# model.add(Dropout(0.1))
# model.add(TimeDistributedDense(X.shape[2]))         # hidden size
# model.add(Activation('softmax'))                    # softmax layer


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, None, 128)         113664    
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 128)         0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 93)                11997     
_________________________________________________________________
activation_1 (Activation)    (None, 93)                0         
Total params: 257,245
Trainable params: 257,245
Non-trainable params: 0
_________________________________________________________________


## Training

We fit the inputs to the model or do the `feed_dict` thing with the `X`and the corresponding labels `y`

In [33]:
print('Training')
modelhistory = History()
history = rnn_model.fit(X, y[:, -1, :], batch_size=25, nb_epoch=epochs, validation_data=(X_val, y_val[:, -1, :]))

Training
Train on 18052 samples, validate on 2005 samples
Epoch 1/50
   25/18052 [..............................] - ETA: 1:18 - loss: 0.0449 - acc: 1.0000



Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Generation of Audio to get the LSTM Output

Getting the output of RNN layer using K(backend inported Keras).function which has the parameter of input and output rnn model layers, `[rnn_model.layers[0].input]`, `[rnn_model.layers[0].output]`, into `get_rnn_layer_output`

In [34]:
get_rnn_layer_output = K.function([rnn_model.layers[0].input], [rnn_model.layers[0].output])

setting the prime length and the generation length of sequence

In [41]:
prime_len = 25
gen_len = 900
print("prime length=", prime_len)
print("length of generated sequence(generated abc file charecters length=", gen_len )

prime length= 25
length of generated sequence(generated abc file charecters length= 900


initializing the `start_index` for the abc file, `d` as a counter and an empty rnn actvations array.

In [42]:
start_index = 0
d = 0
rnn_activations = []

`T` is Temperature parameter for Softmax layer. It specifies the idleness or playing around behaviour of the generative LSTMs

In [43]:
for T in [1.0]: #temperature is varied in steps from 0 to 1.0
    d += 1#counter in incremented
    generated = ''#generated sequence is stored in generated which is initialized empty for every new value of temprature
    sentence = data_str[start_index: start_index + prime_len]#it takes a prime sequence of 25 charecter length form the
    #original abc sheet and append it to the generated sequence charecters
    generated += sentence#appending of prime charecters from the original data file
    print('Generating with seed: "' + sentence + '"')#it print the prime seed we give to the LSTM for generation of the song
##############################################################################################################################
#Generating the charecters
    for i in range(gen_len):#for each Temprature, a loop is run to generate charecters till gen_len
        x = np.zeros((1, prime_len, len(data_set)))#x is an array having zeros till 
        for t, char in enumerate(sentence):
            x[0, t, char_2_idx[char]] = 1.#creating the one hot array of the sentence available so far

        preds = rnn_model.predict(x, verbose=0)[0]#rnn_model is our built model. We predict the next charecter in the sequence
        #based on previous input x to the model.predict function
        layer_output = get_rnn_layer_output([x])[0]#we take the layer otput for the same
        rnn_activations.append(layer_output[0][-1])#we append it into the rnn activations
        next_index = sample(preds, T)#the next index is the predicted using the sample functionwhich takes as parameters the 
        #predicted character and the temperature The function returns a normalized value of the preicted output according to 
        #the degree of randomness(T) to it in next_index
        next_char = idx_2_char[next_index]#this next_index indexed value of the one hot conversion is converted to charecter 
        #and stored in next_char

        generated += next_char#the generated character is appended to the generated sequence
        sentence = sentence[1:] + next_char#sentence that has been used so far is enhances too

    f = open('pred_feature' + '_' + str(T) + '_' + str(d) + '.txt', 'w')#this is used to save the generated output abc as well
    #as weights
    f.write(generated)
    f.close()
    rnn_activations = np.array(rnn_activations)
    print(rnn_activations.shape)
np.savetxt('rnn_activations_pred', rnn_activations, delimiter=',')

Generating with seed: "<start>
X:1
T: La Montfar"
(900, 128)


### Saving the trained model

In [45]:
# serialize model to JSON
model_json = rnn_model.to_json()
with open("rnn_model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
rnn_model.save_weights("rnn_model.h5")
print("Saved model to disk")

Saved model to disk


### Loading the model

In [None]:
# # load json and create model
# json_file = open('rnn_model.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()
# loaded_model = model_from_json(loaded_model_json)
# # load weights into new model
# loaded_model.load_weights("rnn_model.h5")
# print("Loaded model from disk")