# This notebook is just my rough draft of text-based music generation. The formal version is in the notebook ABC.ipynb which documents a more detailed explanation of RNN & LSTM (preliminary knowledge).

In [4]:
import numpy as np 
import random
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Bidirectional, BatchNormalization, Dropout, CuDNNLSTM
from keras import optimizers
from keras.utils import to_categorical
from keras.callbacks import LambdaCallback
import sys
import io

In [5]:
data = open('Nottingham.txt', 'r').read() # should be simple plain text file
chars = list(set(data)) #vocab of the training set
data_size, vocab_size = len(data), len(chars)
print ('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) } #encoder
ix_to_char = { i:ch for i,ch in enumerate(chars) } #decoder
encoded_data = [char_to_ix[char] for char in data]

data has 435382 characters, 93 unique.


In [6]:
hidden_size = 512
seq_len = 100
learning_rate = 2e-3

In [7]:
dataX = [encoded_data[i:i+seq_len] for i in range(0, len(encoded_data)-seq_len-1, seq_len)] #input sequences
dataY = [encoded_data[i:i+seq_len] for i in range(1, len(encoded_data)-seq_len, seq_len)] #target sequences, exactly
#1 character away from the input sequences. Eg: input 'hell' expect to output 'ello'

In [8]:
def one_hot(vector, size): #I naively coded a one_hot function by myself. But I learned 
    #easier ways to do this later. In my other notebooks, I tried different methods.
    '''input a vector of vectorized sequences and output the one-hot version of the input vector'''
    one_hot = []
    for value in vector:
        x = [0 for _ in range(size)]
        x[int(value)] = 1
        one_hot.append(x)
    return one_hot

In [9]:
X = np.array([one_hot(example, vocab_size) for example in dataX])
Y = np.array([one_hot(example, vocab_size) for example in dataY])
print(f'X-shape: {X.shape}, Y-shape: {Y.shape}')

X-shape: (4353, 100, 93), Y-shape: (4353, 100, 93)


In [10]:
dataset = [(inp, targ) for inp, targ in zip(X, Y)]
random.shuffle(dataset) # I don't know why I decided to shuffle it. It turns out that when you re-run the notebook
#X, Y will be different. In this case, even when I save the model, I can not resume the training process but to 
#start over... VERY INJUDICIOUS CHOICE

In [11]:
X = np.array([pair[0] for pair in dataset])
Y = np.array([pair[1] for pair in dataset])

In [50]:
model = Sequential() #Two LSTM layers sandwiching dropouts architecture

model.add(LSTM(hidden_size, input_shape=(seq_len, vocab_size), return_sequences=True, activation='sigmoid')) 
# sigmoid activation is commonly used for binary categorical data (especially our one-hot input).
# set 'return_sequences=True' to match the dimension of my target output.
model.add(Dropout(0.5)) #to avoid overfitting

model.add(LSTM(hidden_size, input_shape=(seq_len, vocab_size), return_sequences=True, activation='sigmoid' ))
model.add(Dropout(0.5))

model.add(Dense(vocab_size, activation = 'softmax')) #Softmax will get me a probability output distribution 
#of all vocab. For example, let's assume our vocab list is the 26 alphabet in lower case. After I input 'h', 
#'e', 'l', 'l' in each timestep, the final output should give me a list of 26 probabilities
#each corresponding to an alphabet, which might look like [0.001,0.003...]. The probability that corresponds to
# the letter 'o', will presumably have the highest value if the model is well-trained so that we can get the 
# correct prediction.

model.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# 'adam' is just a version of stochastic gradient descent.

'Categorical_crossentropy' is a loss measurement for categorical data. It's often used together with "softmax".
It's also called the negative likelihood loss. The formula is $$loss = -\sum_{j=1}^{M}y_{j}\log{\hat{y_{j}}}$$

In [51]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 100, 512)          1241088   
_________________________________________________________________
dropout_5 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 100, 512)          2099200   
_________________________________________________________________
dropout_6 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
dense_3 (Dense)              (None, 100, 93)           47709     
Total params: 3,387,997
Trainable params: 3,387,997
Non-trainable params: 0
_________________________________________________________________


In [181]:
def generate_music(model,seq_len,vocab_size):
    result = []
    start_index = random.randint(0, len(data) - seq_len - 1)
    sentence = data[start_index: start_index + seq_len]
    encoded_sentence = [char_to_ix[char] for char in sentence]
    for i in range(5):
        inp = np.array(one_hot(encoded_sentence, vocab_size)).reshape(1,seq_len,vocab_size)
        out = model.predict_classes(inp)[0] #100 predicted values 
        for value in out:
            result.append(value)
        #result.append(out[-1])
        encoded_sentence = out # use these 100 values to predict another 100 values
    decoded_result = ''
    for value in result:
        decoded_result += ix_to_char[value]
    return decoded_result #a decoded result with len=500
        
        

The model was trained super slow and not performed very well. I did not clean the data and my method of generating the music was certainly problematic. Since most idealistically, if the model had 100% accuracy, by the music generation function I coded, it would predict the same 99 overlapping notes again. Using the hello analogy, it would give me something like "helloellox", which is not what I meant to do. I decided to do a more polished version. You can view the updated one in ABC.ipynb

In [131]:
model.fit(X,Y, epochs= 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


KeyboardInterrupt: 

In [132]:
generate_music(model,seq_len,vocab_size) #result after 20 epochs

'G"c2 |3e/||"D""A2|\n\n3"D"c2c  ]"3"]]]]]"3"D""A2d  ]"3e]]]]]"D""d2AG22G2A|]"D"e2c2]"2c|\n\n\nX: 11\nT:Tont/G2B""\n/2\n\nD"dA""""\n/A"d/|/"""D"e""||\nA\nA"AD7"|"""D"/|[|||A7dA"||]]]|]|""D"A2d ][D"d""\nX: 11\nT:The e2 |/A"":A"\n2A2]AA""":AA22"3AG""d/AA"\n\n/"/A7""A"DD""A2"1"\n\n\n"/]A"\n\n[|\n\n\nDD"d2 |""c\nG2cA": 1\n\nT:The Br|""2BGD"/A": ]|]]]G""|]]]A\n]]GD"2F/A"\n2G2d"AD7A2FDD7"D"D"\nXD""2A"\n3"\n\nX\n\nG2|"\nDD""/||]A"[\n"\n:The Bre"DD"/  G2FD"\n:\n\n\n||GG"\n:|2|:A||]||2FD"/ ||A7|||]|]||G2D2D":2GD"|G"|D"\n: \n2|\nD"\n"GD2"\n\n2A""D""2e\nB2e\n'

In [133]:
model.fit(X,Y, epochs= 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100

KeyboardInterrupt: 

In [134]:
model.save('model.h5')

In [139]:
generate_music(model,seq_len,vocab_size) #result after 86 epochs

'|"/2d/2g/2e/2|"D"d/ B/2c/2B/2d/2|"Am"c/2B/2e/2B/2 "D7A/2G/2F/2A/2|\\\n"G"G/2B/2d/2c/2 "D7"B/2A/2G/2A/2"D@"/2e/2f/2f"D"f/2d/4A/2d/2 /2 "A""c/2d/2A/2c/2 "D7"/fc/2F/2G/2|\n\n"G"G/2B/2G/2B/2 BD7"B/2A/2G/2A/2|A"tA@A/2f/2g/A"f/2e/2|/4 /2|"2d"D7cA#4+/2e/2|/2|"D""d2//2d/2 /2|""\nG"G/2B/2G/2B/2 B///G/2G/2G/2B/2|"/Aa  \n2B/4|/2|A#4a/2|"2f"4f"D"/f""///||2|/2|"2f"D"ff""22f/2 f2 "DD""e/2f/2 /2A/2 "/2cc/2z/2|/2|/2|"C2|/""" 22c"4""//d/4g"A"/a"/a/f2 eb@)a""""2""D"/g"f//A"" /2|"/|"C7"ff"f"/2g"2"/2|"G@"//2|/2|\n2 "2""D"'

In [140]:
model.fit(X,Y, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100


KeyboardInterrupt: 

In [141]:
model.save('model.h5')  #94 epoch

In [143]:
model.fit(X,Y, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
  64/4353 [..............................] - ETA: 6:26 - loss: 0.9057 - accuracy: 0.7045

KeyboardInterrupt: 

In [144]:
model.save('model.h5') #97 epochs

In [145]:
model.fit(X,Y, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

KeyboardInterrupt: 

In [146]:
model.save('model.h5') #103 epochs

In [164]:
generate_music(model,seq_len,vocab_size)

'|/2|"/||""D""D/ F/|2G/2|"D"A/2B/2F/2G/2 A/2G/2F/2E/2|"D7"F/2B/2F/"D7"E/|"Gm"D/ D:|\n"/"Gm"f/ d/|"Gm"c"2 "G@"\nDD"ff"f"/2"D/2F"D"F/2F/2 /2A/2 "/2F/2F/2A/2|\\D""D/2G/2A/2G/"F/2FD""D/2F/|\n\nC@A//G/2d/2fG""g/A""D""G"""G2|e#d2 f7f+/f"f|2F/2|"2F/2FFF@"/2A/2D/2F"\n/DG"f+/2F/2F/2 /2 // D"f+/2""\nF/22F/2F/2c//|D"b/AA"AA"BA""""2" |"2"||2+e/"" 2A"G"/2F// 2C@E/2F/2F/A":C/E/"2c/2|/2GF2E+22c/A/"cceb"/2A+/2[/2A/22"/F"2|/A"AA/|AA"""A""A"E"""322cB""/e"B2 /22"+//|2|/2|/2|D"\n2 2C"/2 "2F//]/[||/2|2F//||A2c/|2Ec2c/2A|A2AA'

In [165]:
model.fit(X,Y, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100


KeyboardInterrupt: 

In [166]:
model.save('model.h5') #120 epochs

In [167]:
model.fit(X,Y, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

KeyboardInterrupt: 

In [168]:
model.save('model.h5') #131 epochs

In [169]:
model.fit(X,Y, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "//anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-169-24da9b7717fa>", line 1, in <module>
    model.fit(X,Y, epochs = 100)
  File "//anaconda3/lib/python3.7/site-packages/keras/engine/training.py", line 1239, in fit
    validation_freq=validation_freq)
  File "//anaconda3/lib/python3.7/site-packages/keras/engine/training_arrays.py", line 196, in fit_loop
    outs = fit_function(ins_batch)
  File "//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py", line 3740, in __call__
    outputs = self._graph_fn(*converted_inputs)
  File "//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1081, in __call__
    return self._call_impl(args, kwargs)
  File "//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1121, in _call_impl
    retu

KeyboardInterrupt: 

In [170]:
model.save('model.h5') #199 epochs

In [171]:
model.fit(X,Y, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
 192/4353 [>.............................] - ETA: 8:36 - loss: 0.7134 - accuracy: 0.7611

KeyboardInterrupt: 

In [186]:
model.save('model.h5') #206

In [None]:
model.fit(X,Y, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100

In [None]:
model = load_model('model.h5')