<a href="https://colab.research.google.com/github/conwayjw97/Music-Generator-LSTM/blob/master/Music_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [Tutorial](https://towardsdatascience.com/music-generation-through-deep-neural-networks-21d7bd81496e)
# [Original Code](https://github.com/RamyaVidiyala/Generate-Music-Using-Neural-Networks/blob/master/Music_generation.ipynb)
# [Dataset](https://github.com/jukedeck/nottingham-dataset)
# [ABC to MIDI Converter](http://www.mandolintab.net/abcconverter.php)
---



# Notebook Parameters

In [1]:
# Dataset
INPUT_FILE = "ashover.abc"

# Training Hyperparameters
BATCH_SIZE = 16
SEQ_LENGTH = 64
SAVE_FREQUENCY = 10
TRAINING_EPOCHS = 100

# Dataset Setup

In [2]:
# Initialise empty repository to pull data into
!git init 
# Add the remote origin
!git remote add origin -f https://github.com/jukedeck/nottingham-dataset.git
# Reset the HEAD in case a different folder was already pulled
!git reset --hard HEAD
!git clean -f -d
# Tell git we are checking out specific folders
!git config core.sparsecheckout true
# Recursively checkout the dataset and models folders
!echo "ABC_cleaned/*" >> .git/info/sparse-checkout
# Pull dataset from repository
!git pull origin master

# Read file
data = open("ABC_cleaned/" + INPUT_FILE).read()
# Map file characters to unique integers
char_to_idx = {ch: i for (i, ch) in enumerate(sorted(list(set(data))))}
# Unique integers to file characters
idx_to_char = { i: ch for (ch, i) in char_to_idx.items() }
vocab_size = len(char_to_idx)

Initialized empty Git repository in /content/.git/
Updating origin
remote: Enumerating objects: 3119, done.[K
remote: Total 3119 (delta 0), reused 0 (delta 0), pack-reused 3119[K
Receiving objects: 100% (3119/3119), 879.17 KiB | 2.60 MiB/s, done.
Resolving deltas: 100% (1432/1432), done.
From https://github.com/jukedeck/nottingham-dataset
 * [new branch]      master     -> origin/master
fatal: ambiguous argument 'HEAD': unknown revision or path not in the working tree.
Use '--' to separate paths from revisions, like this:
'git <command> [<revision>...] -- [<file>...]'
Removing .config/
Removing sample_data/
From https://github.com/jukedeck/nottingham-dataset
 * branch            master     -> FETCH_HEAD


In [4]:
print(data)


X: 1
T:A and A's Waltz
% Nottingham Music Database
S:Mick Peat
M:3/4
L:1/4
K:G
e|:"G"d2B|"D"A3/2B/2c|"G"B2G|"D"A2e|"G"d2B|"D"A3/2B/2c|
M:2/4
"F"B=F|
M:3/4
"G"G2e:||:
"C"g2e|"Bb"=f2d|"F"c2A|=F2e|"C"g2e|"Bb"=f2d|
M:2/4
"F"cA|
M:3/4
 [1 "G"G2e:| [2"G"G2z||


X: 2
T:Barry's Favourite
% Nottingham Music Database
S:Mick Peat
M:2/2
K:D
A2|:"D"a3/2b/2a3/2g/2 f2(3def|"Em"g3/2a/2g3/2f/2 "A"e2A2|"D"f3/2g/2f3/2e/2 d2f2\
|"Em"B3/2c/2d3/2e/2 "A"c2A2|
"D"a3/2b/2a3/2g/2 f2(3def|"Em"g3/2a/2g3/2f/2 "A"e2A2|\
"D"f3/2g/2f3/2e/2 d3/2e/2f3/2A/2|"G"B3/2d/2"A"d3/2c/2 "D"d2A2:|
|:"G"B3/2A/2B3/2g/2 "D"d2A2|"Em"e3/2d/2e3/2f/2 "A"e2a2|\
"G"b3/2a/2(3gab "D"a3/2g/2(3fga|"E"f3/2e/2(3def "A"e2A2|
"G"B3/2A/2B3/2g/2 "D"d2A2|"Em"e3/2d/2e3/2f/2 "A"e2a2|\
"G"b3/2a/2g3/2f/2 "A"a3/2g/2f3/2e/2|[1"D"d2f2 d2A2:|[2 d2f2d2||


X: 3
T:Black Boy
% Nottingham Music Database
S:Mick Peat
M:2/4
L:1/4
K:A
|:"A"A "E"c/4B/4A/4G/4|"A"A/2c/2 e/2a/2|"E"g/2b/2 e/2d/2|"A"c/2AB/2|\
"A"c/2e/2 "B"^d/2f/2|"E"e/2ge/2|"B"f/2a/2 g/4f/4e/4^d/4|"E"e 

In [5]:
print(char_to_idx)

{'\n': 0, ' ': 1, '"': 2, '#': 3, '%': 4, '&': 5, "'": 6, '(': 7, ',': 8, '-': 9, '.': 10, '/': 11, '0': 12, '1': 13, '2': 14, '3': 15, '4': 16, '5': 17, '6': 18, '7': 19, '8': 20, '9': 21, ':': 22, '=': 23, 'A': 24, 'B': 25, 'C': 26, 'D': 27, 'E': 28, 'F': 29, 'G': 30, 'H': 31, 'I': 32, 'J': 33, 'K': 34, 'L': 35, 'M': 36, 'N': 37, 'O': 38, 'P': 39, 'R': 40, 'S': 41, 'T': 42, 'V': 43, 'W': 44, 'X': 45, 'Y': 46, '[': 47, '\\': 48, ']': 49, '^': 50, '_': 51, 'a': 52, 'b': 53, 'c': 54, 'd': 55, 'e': 56, 'f': 57, 'g': 58, 'h': 59, 'i': 60, 'j': 61, 'k': 62, 'l': 63, 'm': 64, 'n': 65, 'o': 66, 'p': 67, 'r': 68, 's': 69, 't': 70, 'u': 71, 'v': 72, 'w': 73, 'y': 74, 'z': 75, '|': 76, '~': 77}


In [6]:
print(idx_to_char)

{0: '\n', 1: ' ', 2: '"', 3: '#', 4: '%', 5: '&', 6: "'", 7: '(', 8: ',', 9: '-', 10: '.', 11: '/', 12: '0', 13: '1', 14: '2', 15: '3', 16: '4', 17: '5', 18: '6', 19: '7', 20: '8', 21: '9', 22: ':', 23: '=', 24: 'A', 25: 'B', 26: 'C', 27: 'D', 28: 'E', 29: 'F', 30: 'G', 31: 'H', 32: 'I', 33: 'J', 34: 'K', 35: 'L', 36: 'M', 37: 'N', 38: 'O', 39: 'P', 40: 'R', 41: 'S', 42: 'T', 43: 'V', 44: 'W', 45: 'X', 46: 'Y', 47: '[', 48: '\\', 49: ']', 50: '^', 51: '_', 52: 'a', 53: 'b', 54: 'c', 55: 'd', 56: 'e', 57: 'f', 58: 'g', 59: 'h', 60: 'i', 61: 'j', 62: 'k', 63: 'l', 64: 'm', 65: 'n', 66: 'o', 67: 'p', 68: 'r', 69: 's', 70: 't', 71: 'u', 72: 'v', 73: 'w', 74: 'y', 75: 'z', 76: '|', 77: '~'}


# Library Imports

In [None]:
import numpy as np
import time
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding

# Network 1 Definition

In [None]:
model = Sequential()
model.add(Embedding(vocab_size, 512, batch_input_shape=(BATCH_SIZE, SEQ_LENGTH)))

for i in range(3):
    model.add(LSTM(256, return_sequences=True, stateful=True))
    model.add(Dropout(0.2))

model.add(TimeDistributed(Dense(vocab_size))) 
model.add(Activation('softmax'))
    
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (16, 64, 512)             39936     
_________________________________________________________________
lstm_12 (LSTM)               (16, 64, 256)             787456    
_________________________________________________________________
dropout_12 (Dropout)         (16, 64, 256)             0         
_________________________________________________________________
lstm_13 (LSTM)               (16, 64, 256)             525312    
_________________________________________________________________
dropout_13 (Dropout)         (16, 64, 256)             0         
_________________________________________________________________
lstm_14 (LSTM)               (16, 64, 256)             525312    
_________________________________________________________________
dropout_14 (Dropout)         (16, 64, 256)            

# Network Training

In [None]:
def read_batch(T, vocab_size):
    length = T.shape[0]; #129,665
    batch_chars = int(length / BATCH_SIZE); # 8,104

    for start in range(0, batch_chars - SEQ_LENGTH, SEQ_LENGTH): # (0, 8040, 64)
        X = np.zeros((BATCH_SIZE, SEQ_LENGTH)) # 16X64
        Y = np.zeros((BATCH_SIZE, SEQ_LENGTH, vocab_size)) # 16X64X86
        for batch_idx in range(0, BATCH_SIZE): # (0,16)
            for i in range(0, SEQ_LENGTH): #(0,64)
                X[batch_idx, i] = T[batch_chars * batch_idx + start + i] # 
                Y[batch_idx, i, T[batch_chars * batch_idx + start + i + 1]] = 1
        yield X, Y

# Convert text data into numerical indices
T = np.asarray([char_to_idx[c] for c in data], dtype=np.int32) 

steps_per_epoch = (len(data) / BATCH_SIZE - 1) / SEQ_LENGTH  

!mkdir -p weights/
total_time = 0
loss = 0
accuracy = 0
print("Training...")
for epoch in range(1, TRAINING_EPOCHS+1):
  start = time.time()

  for i, (X, Y) in enumerate(read_batch(T, vocab_size)):
    loss, accuracy = model.train_on_batch(X, Y)

  if epoch % SAVE_FREQUENCY == 0:  
    print("Saving checkpoint to Model1.h5")
    model.save_weights("weights/Model1.h5")

  total_time += time.time()-start
  print("Epoch %d/%d - %ds - Estimated time left %ds - Loss: %.5f - Accuracy: %.5f" % (epoch, TRAINING_EPOCHS, time.time()-start, (total_time/epoch)*(TRAINING_EPOCHS-epoch), loss, accuracy))

print("Training complete!")

Training...
Epoch 1/100 - 3s - Estimated time left 367s - Loss: 3.29538 - Accuracy: 0.13086
Epoch 2/100 - 0s - Estimated time left 202s - Loss: 3.15243 - Accuracy: 0.13867
Epoch 3/100 - 0s - Estimated time left 146s - Loss: 3.07819 - Accuracy: 0.15332
Epoch 4/100 - 0s - Estimated time left 117s - Loss: 2.96394 - Accuracy: 0.17969
Epoch 5/100 - 0s - Estimated time left 100s - Loss: 2.80482 - Accuracy: 0.26074
Epoch 6/100 - 0s - Estimated time left 89s - Loss: 2.55408 - Accuracy: 0.32129
Epoch 7/100 - 0s - Estimated time left 81s - Loss: 2.33899 - Accuracy: 0.35938
Epoch 8/100 - 0s - Estimated time left 74s - Loss: 2.13661 - Accuracy: 0.41016
Epoch 9/100 - 0s - Estimated time left 69s - Loss: 2.01180 - Accuracy: 0.43457
Saving checkpoint to Model1.h5
Epoch 10/100 - 0s - Estimated time left 65s - Loss: 1.90642 - Accuracy: 0.44727
Epoch 11/100 - 0s - Estimated time left 62s - Loss: 1.81783 - Accuracy: 0.47168
Epoch 12/100 - 0s - Estimated time left 59s - Loss: 1.69926 - Accuracy: 0.50098
E

In [None]:
MODEL_DIR = './model'
model2 = Sequential()
model2.add(Embedding(vocab_size, 512, batch_input_shape=(1,1)))
for i in range(3):
    model2.add(LSTM(256, return_sequences=True, stateful=True))
    model2.add(Dropout(0.2))

model2.add(TimeDistributed(Dense(vocab_size))) 
model2.add(Activation('softmax'))

model2.load_weights("weights/Model1.h5")
model2.summary()



sampled = []
for i in range(1024):
    batch = np.zeros((1, 1))
    if sampled:
        batch[0, 0] = sampled[-1]
    else:
        batch[0, 0] = np.random.randint(vocab_size)
    result = model2.predict_on_batch(batch).ravel()
    sample = np.random.choice(range(vocab_size), p=result)
    sampled.append(sample)

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (1, 1, 512)               39936     
_________________________________________________________________
lstm_15 (LSTM)               (1, 1, 256)               787456    
_________________________________________________________________
dropout_15 (Dropout)         (1, 1, 256)               0         
_________________________________________________________________
lstm_16 (LSTM)               (1, 1, 256)               525312    
_________________________________________________________________
dropout_16 (Dropout)         (1, 1, 256)               0         
_________________________________________________________________
lstm_17 (LSTM)               (1, 1, 256)               525312    
_________________________________________________________________
dropout_17 (Dropout)         (1, 1, 256)              

# Network Sampling

In [None]:
sampled = []
for i in range(1024):
    batch = np.zeros((1, 1))
    if sampled:
        batch[0, 0] = sampled[-1]
    else:
        batch[0, 0] = np.random.randint(vocab_size)
    result = model2.predict_on_batch(batch).ravel()
    sample = np.random.choice(range(vocab_size), p=result)
    sampled.append(sample)

print(''.join(idx_to_char[c] for c in sampled))

"de|
"G"dgd "A"c2f|"D"fdd d2e:|
"D"fef "G"gfg|"D"aga "G"b3|"D"dce "G"=ffd|"C"edc "D"d3|


X: 36
T:Racoon Reel
% Nottingham Music Database
S:Mick Peat
M:4/4
L:1/4
K:G
f|: || :|:"G"g3/2a/2 "D"df|"G"gd2e|"D"fd "G"B2d|
"G"BA2D/2B/2c/2|"G"d2B|"G"d2B|"G"B/2d/2B/2c/2|"D"d2d|\
"G"B/2A/2B/2c/2 "D"dz:|
"G"B/2d/2B/2d/2|"D"a/2e/2f/2f/2 "A"e/2b/2g/2c/2c/2|
|:[1"D"d3/2c/2d/2 d/2f/2g/2|"D"d/2e/2d/2e/2 "D"d/2c/2d/2c/2|\
"G"=B/2G/2B/2d/2 B/2a/2f/2f/2|"D"e/2d/2c/2d/2 G/2e/2d/2c/2|
"G"=B/2G/2B/2d/2 B/2d/2G/2B/2|"D"e/2d/2A/2B/2 A/2e/2d/2f/2|\
"G"B/2d/2=G/2B/2 A/2d/2d/2G/2|
"D"=G/2A/2B/2A/2 A/2d/2f/2d/2|"D"a/2e/2B/2d/2 "G"g/2a/2b/2c/2|\
"D"a/2f/2f/2d/2 "A"g/2a/2b/2c'/2|
"D"d2 d/2A/2B/2F/2|"D"c/2e/2d/2e/2 f/2e/2f/2g/2| |[7"Bm"e/2d/2c/2d/2 "D"d/2e/2d/2f/2|"D"d/2e/2f/2f/2 "G"g/2a/2g/2g/2|
"D"f/2f/2f/2 "A"g/2g/2"D"f/2d/2|\
"D"f/2d/2 d/2f/2f/2|"D"e/2c/2d/2c/2 "G"B/2c/2B/2c/2|"G"B/2e/2g/2f/2 "A"g/2a/2c/2G/2:|
P:D
|:"D"d/2f/2d/2f/2 "G"g/2d/2B/2c/2|
"D"d2 "A7"d2:|


X: 26
T:The Fancooy
% Nottingham Music Database
