In [1]:
import os
import numpy as np
from collections import Counter
from IPython.utils.capture import capture_output
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import ModelCheckpoint
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

from midi_util import read_midi, produce_song

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
path = os.getcwd() + "/data/"
print("Data path: ", path)

midi_file_names = [i for i in os.listdir(path) if i.endswith(".mid")]
print("# of Files Found: ", len(midi_file_names))

#hide output, gives many long annoying warnings
with capture_output():
    note_data_by_file = np.array([read_midi(path+i) for i in midi_file_names], dtype=object)

#list comp to flatten
flattened_note_data = [element for note_ in note_data_by_file for element in note_]
print("# of Notes Total: ", len(flattened_note_data))

all_unique_notes = list(set(flattened_note_data))
print("# of Unique Notes: ", len(all_unique_notes))

Data path:  /Users/danielrjohnson/Documents/OneDrive/Programming/AIO/Music-Generation/data/
# of Files Found:  35
# of Notes Total:  29748
# of Unique Notes:  993


In [3]:
note_counts = dict(Counter(flattened_note_data))

MIN_COUNT = 25
frequent_notes = [note_ for note_, count in note_counts.items() if count >= MIN_COUNT]
print("# of Notes Occurring At Least", MIN_COUNT, "Times: ", len(frequent_notes))

# of Notes Occurring At Least 25 Times:  216


In [4]:
freq_note_data = [list(filter(lambda x: x in frequent_notes, midi_data)) for midi_data in note_data_by_file]
freq_note_data = np.array(freq_note_data, dtype=object)

In [5]:
n_timesteps = 32
X, y = [], []

for notes_of_file in freq_note_data:
    for i in range(0, len(notes_of_file) - n_timesteps):
        # (n_timesteps) length note sequence
        X.append(notes_of_file[i:i + n_timesteps])
        # the note following that sequence
        y.append(notes_of_file[i + n_timesteps])

X = np.array(X)
y = np.array(y)

# X = np.array([notes_of_file[i:i + n_timesteps] for i in range(0, len(notes_of_file) - n_timesteps)] )

In [6]:
unique_x = list(set(X.ravel()))
x_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_x))
x_seq = np.array([[x_note_to_int[note] for note in row] for row in X])

unique_y = list(set(y))
y_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_y)) 
y_seq = np.array([y_note_to_int[note] for note in y])

In [7]:
x_tr, x_val, y_tr, y_val = train_test_split(x_seq, y_seq, test_size=0.2, random_state=0)

In [8]:
inputs = Input((n_timesteps, 1))
x = LSTM(128)(inputs)
x = Dense(128, activation="relu")(x)
output = Dense(128, activation="softmax")(x)

model = tf.keras.Model(inputs, output)

model.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001))
model.summary()

Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 1)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               66560     
_________________________________________________________________
dense (Dense)                (None, 128)               16512     
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
Total params: 99,584
Trainable params: 99,584
Non-trainable params: 0
_________________________________________________________________


In [9]:
mc = ModelCheckpoint('models/best_model.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)
history = model.fit(x=x_tr, y=y_tr,
                    batch_size=128, epochs=10, 
                    validation_data=(x_val, y_val),
                    verbose=1, callbacks=[mc]
)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 2.64507, saving model to models/best_model.h5
Epoch 2/10

Epoch 00002: val_loss did not improve from 2.64507
Epoch 3/10

Epoch 00003: val_loss did not improve from 2.64507
Epoch 4/10

Epoch 00004: val_loss did not improve from 2.64507
Epoch 5/10

Epoch 00005: val_loss did not improve from 2.64507
Epoch 6/10

Epoch 00006: val_loss did not improve from 2.64507
Epoch 7/10

Epoch 00007: val_loss did not improve from 2.64507
Epoch 8/10

Epoch 00008: val_loss did not improve from 2.64507
Epoch 9/10

Epoch 00009: val_loss did not improve from 2.64507
Epoch 10/10

Epoch 00010: val_loss did not improve from 2.64507


In [10]:
produce_song(x_val, n_timesteps, x_note_to_int)