# Automatic Music Chords Generation using LSTM networks

## Set random seed

In [1]:
# Seed value
seed = 1988

import os
os.environ['PYTHONHASHSEED']=str(seed)

import random
random.seed(seed)

import numpy as np
np.random.seed(seed)

import tensorflow as tf
tf.set_random_seed(seed)

from keras import backend as bk
conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
session = tf.Session(graph=tf.get_default_graph(), config=conf)
bk.set_session(session)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import pandas as pd
import glob
import json
import time
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.utils import to_categorical
from keras import optimizers
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import EarlyStopping
from xml.dom.minidom import parse, parseString
from xml.parsers.expat import ExpatError

In [3]:
input_len = 8
pred_len = 1
vocab_size = 7

# Select genres

In [4]:
path =r'.'
genres = list()


for sub_path in glob.iglob(path + '/hooktheory_dataset/datasets/xml/**/**/**/'):    
    
    
    file = sub_path + '/song_info.json'
    
    
    with open(file,'r') as load_f:
        load_dict = json.load(load_f)

        for genre in load_dict['genres']:
            
            if genre not in genres:
                genres.append(genre)

In [5]:
print(genres, end = '')

['Metal', 'Rock', 'Pop', 'Singer-Songwriter', 'Alternative', 'Indie', 'House', 'Soundtrack', 'Hip-Hop/Rap', 'R & B', 'Dance', 'Electronic', 'Holiday', 'Worship', 'Classical', 'Video Game', 'Reggae', 'Experimental', 'Jazz', 'Blues', 'Soul', 'K-pop', "Children's", 'Vocal', 'J-Pop', 'Techno', 'World', 'Alt-Country', 'Disney', 'Folk', 'Punk', 'Country', 'Latin']

In [6]:
len(genres)

33

In [7]:
select_genres = ['Metal', 'Rock', 'Pop', 'Singer-Songwriter', 'Alternative', 'Indie', 'House', 'Dance', 'Electronic', 'Video Game', 'Reggae',
           'Experimental', 'Jazz', 'Blues', 'Soul', 'K-pop', 'J-Pop', 'Techno', 'Alt-Country', 'Folk', 'Punk', 'Country']

In [8]:
len(select_genres)

22

# Load the data

In [9]:
path =r'.'
sequences = list()
next_chord = list()
count_songs = 0

for sub_path in glob.iglob(path + '/hooktheory_dataset/datasets/xml/**/**/**/'):    
    
    
    file = sub_path + '/song_info.json'
    
    
    with open(file,'r') as load_f:
        load_dict = json.load(load_f)

        for genre in load_dict['genres']:
            
            if genre in select_genres:
            
                count_songs += 1

                sections = load_dict['section']

                chords = []

                for section in sections:

                    file = sub_path + '/' + section + '.xml'
                    try:
                        dom = parse(file)
                        notes = dom.getElementsByTagName("chord")

                        for note in notes:
                            if note.getElementsByTagName("sd")[0].childNodes[0].nodeValue != 'rest':
                                chords.append(int(note.getElementsByTagName("sd")[0].childNodes[0].nodeValue)-1)

                    except ExpatError:
                        pass

                    continue

                for i in range(input_len, len(chords) - pred_len + 1):
                    sequences.append(chords[i - input_len : i])
                    next_chord.append(chords[i]) 
                    
                break

In [10]:
a = sequences
b = next_chord

In [11]:
len(sequences)

111478

In [12]:
count_songs

5720

In [13]:
a = np.array(a)
b = np.array(b)
X, y = a, b

## Use one hot encoding to shift numeral to vector

In [14]:
X = [to_categorical(x, num_classes=vocab_size) for x in X]
X = np.array(X)
y = to_categorical(y, num_classes=vocab_size)

##  Split the dataset into train and test data

In [15]:
train_size = int(len(a) * 0.70)
test_size = len(a) - train_size
train_X, test_X = X[0:train_size], X[train_size:len(X)]
train_y, test_y = y[0:train_size], y[train_size:len(X)]

## Define the LSTM network

In [16]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(train_y.shape[1], activation='softmax'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 8, 128)            69632     
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 903       
Total params: 202,119
Trainable params: 202,119
Non-trainable params: 0
_________________________________________________________________
None


## Fit the LSTM network

In [17]:
# 8 for 1
start = time.time()

opt = optimizers.adam(lr=0.01)
rlrop = ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=1)
estop = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=3)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(train_X, train_y, batch_size=64, epochs=100, verbose=2, callbacks=[rlrop, estop], validation_data = (test_X, test_y))

end = time.time()
print('Time elapsed: %d'%(end - start))

Train on 78034 samples, validate on 33444 samples
Epoch 1/100
 - 89s - loss: 1.5770 - acc: 0.4059 - val_loss: 1.4888 - val_acc: 0.4627
Epoch 2/100
 - 85s - loss: 1.4212 - acc: 0.4957 - val_loss: 1.4547 - val_acc: 0.4818
Epoch 3/100
 - 90s - loss: 1.3679 - acc: 0.5187 - val_loss: 1.4424 - val_acc: 0.4922
Epoch 4/100
 - 90s - loss: 1.3329 - acc: 0.5339 - val_loss: 1.4355 - val_acc: 0.4923
Epoch 5/100
 - 91s - loss: 1.3036 - acc: 0.5460 - val_loss: 1.4380 - val_acc: 0.4923
Epoch 6/100
 - 89s - loss: 1.2309 - acc: 0.5705 - val_loss: 1.4389 - val_acc: 0.5006
Epoch 7/100
 - 88s - loss: 1.1930 - acc: 0.5858 - val_loss: 1.4497 - val_acc: 0.4970
Epoch 8/100
 - 86s - loss: 1.1349 - acc: 0.6043 - val_loss: 1.4725 - val_acc: 0.4969
Epoch 9/100
 - 88s - loss: 1.0894 - acc: 0.6218 - val_loss: 1.4947 - val_acc: 0.4947
Time elapsed: 796


## Define a function to select one of the states by sampling a state

In [18]:
def sample(a):

    return np.argmax(np.random.multinomial(1, a, 1))

## Define a function to transform the numerals into symbols

In [19]:
key_dic = {1:'C', 2:'Db', 3:'D', 4:'Eb', 5:'E', 6:'F', 7:'#F', 8:'G', 9:'Ab', 10:'A', 11:'bB', 12:'B'} 

In [20]:
def transform(output):
    if scale == 'major':
        for n in range(len(output)):
            if output[n] == 1:
                output[n] = key_dic[key_idx]
            elif output[n] == 2 or output[n] == 3:
                if key_idx + output[n] * 2 - 2 <= 12:
                    output[n] = key_dic[key_idx + output[n] * 2 - 2] + 'm'
                else:
                    output[n] = key_dic[key_idx + output[n] * 2 - 2 -12] + 'm'
            elif output[n] == 4 or output[n] == 5:
                if key_idx + output[n] * 2 - 3 <= 12:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3]
                else:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3 - 12]
            elif output[n] == 6:
                if key_idx + output[n] * 2 - 3 <= 12:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3] + 'm'
                else:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3 - 12] + 'm'
            else:
                if key_idx + output[n] * 2 - 3 <=12:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3] + 'dim'
                else:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3 - 12] + 'dim'

    else:
         for n in range(len(output)):
            if output[n] == 1:
                output[n] = key_dic[key_idx] + 'm'
            elif output[n] == 2:
                if key_idx + 2 <= 12:
                    output[n] = key_dic[key_idx + 2] + 'dim'
                else:
                    output[n] = key_dic[key_idx + 2 - 12] + 'dim'
            elif output[n] == 3:
                if key_idx + 2 < 12:
                    output[n] = key_dic[key_idx + 3]
                else:
                    output[n] = key_dic[key_idx + 3 - 12]    
            elif output[n] == 4 or output[n] == 5:
                if key_idx + output[n] * 2 - 3 <= 12:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3] + 'm'
                else:
                    output[n] = key_dic[key_idx + output[n] * 2 - 3 - 12] + 'm'
            elif output[n] == 6:
                if key_idx + 8 <= 12:
                    output[n] = key_dic[key_idx + 8]
                else:
                    output[n] = key_dic[key_idx + 8 - 12]
            else: 
                if key_idx + 10 <= 12:
                    output[n] = key_dic[key_idx + 10]
                else:
                    output[n] = key_dic[key_idx + 10 - 12]
    
    return output

## Prediction

In [21]:
outputs = list()
for i in range(4):   
    
    input = [list(np.random.randint(low = 0, high = 7,size = 8))]
    
    output_len = 12
    output = list()

    for j in range(output_len):    
        encoded = to_categorical(input, num_classes=7)
        y_pred = model.predict(encoded, verbose=0)[0]
        next_chord = sample(y_pred)
        output.append(next_chord + 1)
        input[0] = input[0][1:]
        input[0].append(next_chord)

    outputs.append(output)
    
    print(output, end = '')
    print('')

[4, 4, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2]
[3, 6, 5, 4, 3, 3, 2, 4, 5, 1, 5, 6]
[5, 2, 6, 6, 3, 5, 4, 6, 5, 5, 4, 5]
[6, 6, 4, 4, 4, 4, 4, 5, 5, 5, 5, 1]


In [22]:
key_idx = 5
key = key_dic[key_idx]
scale = 'major'
o = list(outputs[0])

transform(o)
print(o, end = '')
print('')

['A', 'A', '#Fm', '#Fm', 'E', 'E', '#Fm', '#Fm', 'E', 'E', '#Fm', '#Fm']


In [23]:
key_idx = 1
key = key_dic[key_idx]
scale = 'major'
o = list(outputs[1])

transform(o)
print(o, end = '')
print('')

['Em', 'Am', 'G', 'F', 'Em', 'Em', 'Dm', 'F', 'G', 'C', 'G', 'Am']


In [24]:
key_idx = 12
key = key_dic[key_idx]
scale = 'major'
o = list(outputs[2])

transform(o)
print(o, end = '')
print('')

['#F', 'Dbm', 'Abm', 'Abm', 'Ebm', '#F', 'E', 'Abm', '#F', '#F', 'E', '#F']


In [25]:
key_idx = 1
key = key_dic[key_idx]
scale = 'major'
o = list(outputs[3])

transform(o)
print(o, end = '')
print('')

['Am', 'Am', 'F', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'C']


## Encourages the diversity of prediction 

In [26]:
def sample(a):
    a = np.log(a) / 1.2
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))

In [27]:
outputs2 = list()
for i in range(4):   
    
    input = [list(np.random.randint(low = 0, high = 7,size = 8))]
    
    output_len = 12
    output = list()

    for j in range(output_len):    
        encoded = to_categorical(input, num_classes=7)
        y_pred = model.predict(encoded, verbose=0)[0]
        next_chord = sample(y_pred)
        output.append(next_chord + 1)
        input[0] = input[0][1:]
        input[0].append(next_chord)

    outputs2.append(output)
    
    print(output, end = '')
    print('')

[4, 3, 7, 4, 3, 5, 6, 3, 5, 4, 3, 6]
[1, 6, 7, 7, 1, 7, 6, 1, 7, 7, 6, 1]
[1, 1, 1, 5, 5, 5, 4, 4, 1, 5, 6, 6]
[1, 3, 1, 3, 2, 5, 6, 1, 3, 2, 7, 6]


In [28]:
key_idx = 10
key = key_dic[key_idx]
scale = 'minor'
o = list(outputs2[0])

transform(o)
print(o, end = '')
print('')

['Dm', 'C', 'G', 'Dm', 'C', 'Em', 'F', 'C', 'Em', 'Dm', 'C', 'F']


In [29]:
key_idx = 1
key = key_dic[key_idx]
scale = 'minor'
o = list(outputs2[1])

transform(o)
print(o, end = '')
print('')

['Cm', 'Ab', 'bB', 'bB', 'Cm', 'bB', 'Ab', 'Cm', 'bB', 'bB', 'Ab', 'Cm']


In [30]:
key_idx = 1
key = key_dic[key_idx]
scale = 'major'
o = list(outputs2[2])

transform(o)
print(o, end = '')
print('')

['C', 'C', 'C', 'G', 'G', 'G', 'F', 'F', 'C', 'G', 'Am', 'Am']


In [31]:
key_idx = 10
key = key_dic[key_idx]
scale = 'major'
o = list(outputs2[3])

transform(o)
print(o, end = '')
print('')

['A', 'Dbm', 'A', 'Dbm', 'Bm', 'E', '#Fm', 'A', 'Dbm', 'Bm', 'Abdim', '#Fm']
