2nd attempt at generating nice music using a neural network. I will be basing this on the following tutorial:
https://www.datacamp.com/community/tutorials/using-tensorflow-to-compose-music

In [27]:
# First I need to create dataset, this will be done via webscraping

from urllib.request import urlopen, urlretrieve
from bs4 import BeautifulSoup
import time

# Directory to save the Midi files
save_dir = 'piano/'

# Url is plsit into two so we can go through the pages of the search results
url0 = 'https://www.mutopiaproject.org/cgibin/make-table.cgi?startat='
url1 = '&searchingfor=&Composer=&Instrument=Piano&Style=Jazz&collection=&id=&solo=1&recent=&timelength=1&timeunit=week&lilyversion=&preview='

# Init values
song_number = 0
link_count = 10

file_name = 0

# main loop
while link_count > 0:
    #finds the correct page of search results
    url = url0 + str(song_number) + url1
    html = urlopen(url)
    soup = BeautifulSoup(html.read())
    # Finds all the links on the page
    links = soup.find_all('a')
    link_count = 0

    for link in links:
        href = link['href']
        # Find all links with a .mid in them
        if href.find('.mid') >= 0:
            link_count = link_count + 1
            #Download that link
            urlretrieve(href, 'piano/'+str(file_name)+'.mid' )
            file_name += 1

    #+10 since there are 10 results on each page
    song_number += 10
    # Small wait to be nice to the website
    time.sleep(10.0)

In [31]:
# Next we have to extract the data from the Midi files and preprocess it

import os
from music21 import converter, pitch, interval, instrument, note, chord
import numpy as np
import tensorflow.keras.utils as np_utils

save_dir = 'piano/'

song_list = os.listdir(save_dir)

original_scores = []

# Adds the parsed songs to a list
for song in song_list:
    score = converter.parse(save_dir+song)
    original_scores.append(score)

# Remove polyphonic music (multiple instruments)
# This function checks for monophonic music
# If we don't do this, notes from multiple instruments will be combined into chords
def monophonic(stream):
    try:
        length = len(instrument.partitionByInstrument(stream).parts)
    except:
        length = 0
    return length == 1

# Loops through songs, checks they are monophonic then chordifies and adds to list
original_scores_chordified = []
for song in original_scores:
    if monophonic(song):
        original_scores_chordified.append(song.chordify())

original_scores = original_scores_chordified

# Now we need to extract the notes, chords and durations from the songs
original_chords = [[] for _ in original_scores] #empty list of lists
original_durations = [[] for _ in original_scores]
original_keys = []

for i, song in enumerate(original_scores):
    # Save the key of the song
    original_keys.append(str(song.analyze('key')))
    # Loop through the notes and chords
    for element in song:
        # if note
        if isinstance(element, note.Note):
            # add note
            original_chords[i].append(element.pitch)
            original_durations[i].append(element.duration.quarterLength)
        # if chord
        elif isinstance(element, chord.Chord):
            # add all notes making up chord
            original_chords[i].append('.'.join(str(n) for n in element.pitches))
            original_durations[i].append(element.duration.quarterLength)

    print(str(i))

# I am going to keep all the key signatures for now unlike tutorial

# Identify unique notes and chords and create dictionaries to convert to ints
unique_chords = np.unique([i for s in original_chords for i in s])
chord_to_int = dict(zip(unique_chords, list(range(0, len(unique_chords)))))

# Map durations to ints too
unique_dur = np.unique([i for s in original_durations for i in s])
dur_to_int = dict(zip(unique_dur, list(range(0, len(unique_dur)))))

print(len(unique_chords))
print(len(unique_dur))

# We also need dictionaries to convert the other way
int_to_chord = {i: c for c, i in chord_to_int.items()}
int_to_dur = {i: c for c, i in dur_to_int.items()}

# Lastly we can make our training sequences and target notes
seq_len = 32

train_chords = []
train_dur = []

target_chords = []
target_dur = []

# loop through the chords
for s in range(len(original_chords)):
    # create a list of ints from the chord list
    chord_list = [chord_to_int[c] for c in original_chords[s]]
    dur_list = [dur_to_int[d] for d in original_durations[s]]

    # make sequences 32 in length and add to the training lists
    for i in range(len(chord_list) - seq_len):
        train_chords.append(chord_list[i:i+seq_len])
        train_dur.append(dur_list[i:i+seq_len])

        target_chords.append(chord_list[i+1])
        target_dur.append(dur_list[i+1])

# Reshape to fit LSTM
input_chords = np.reshape(np.array(train_chords), (len(train_chords), seq_len,1))
input_dur = np.reshape(np.array(train_dur), (len(train_dur), seq_len, 1))
# Normalise these
input_chords = input_chords / float(len(unique_chords))
input_dur = input_dur / float(len(unique_dur))
# Make target notes categorical
target_chords = np_utils.to_categorical(target_chords)
target_dur = np_utils.to_categorical(target_dur)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
3248
16


In [25]:
# Set up our LSTM model
import tensorflow as tf

# Change lists to np arrays and get dims
train_chords = np.array(train_chords)
n_samples = train_chords.shape[0]
n_chords = train_chords.shape[1]

train_dur = np.array(train_dur)
n_dur = train_dur.shape[1]
n_samples_dur = train_dur.shape[0]

# Set input dim for NN
input_dim = n_chords * seq_len
embed_dim = 64

# We want 2 inputs and outputs
chord_input = tf.keras.layers.Input(shape = (None,))
dur_input = tf.keras.layers.Input(shape = (None,))

# Embed layers (lookup tables goes from sparse -> dense)
chord_embedding = tf.keras.layers.Embedding(n_samples, embed_dim, input_length = seq_len)(chord_input)
dur_embedding = tf.keras.layers.Embedding(n_samples_dur, embed_dim, input_length = seq_len)(dur_input)

# Merge layer that concatenates 2 embed layers before going into LSTM layers
merge_layer = tf.keras.layers.Concatenate(axis=1)([chord_embedding, dur_embedding])

# LSTM layer
lstm_layer = tf.keras.layers.LSTM(512, return_sequences=True)(merge_layer)

# Dense layer
dense_layer = tf.keras.layers.Dense(256)(lstm_layer)

# Output Layers
chord_output = tf.keras.layers.Dense(n_chords, activation = 'softmax')(dense_layer)
dur_output = tf.keras.layers.Dense(n_dur, activation = 'softmax')(dense_layer)

# Overall model
lstm = tf.keras.Model(inputs = [chord_input, dur_input], outputs = [chord_output, dur_output])


In [29]:
# Compile the model
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')


# train the model
lstm.fit([train_chords, train_dur], [np.array(target_chords), np.array(target_dur)], epochs=500, batch_size=64)

Epoch 1/500


InvalidArgumentError:  assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [64 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [64 64]
	 [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at tmp/ipykernel_4231/3803088062.py:6) ]] [Op:__inference_train_function_26317]

Function call stack:
train_function


In [28]:
lstm.summary()

Model: "model_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_20 (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
input_21 (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_18 (Embedding)        (None, None, 64)     862912      input_20[0][0]                   
__________________________________________________________________________________________________
embedding_19 (Embedding)        (None, None, 64)     862912      input_21[0][0]                   
____________________________________________________________________________________________