In [1]:
import os
import numpy as np
from collections import Counter
from IPython.utils.capture import capture_output
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import ModelCheckpoint
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

from midi_util import read_midi, produce_song

Init Plugin
Init Graph Optimizer
Init Kernel


In [2]:
path = os.getcwd() + "/data/"
print("Data path: ", path)

midi_file_names = [i for i in os.listdir(path) if i.endswith(".mid")]
print("# of Files Found: ", len(midi_file_names))

notes = []
durations = []
#hide output, gives many long annoying warnings
with capture_output():
    note_data_by_file = []
    duration_data_by_file = []

    for fn in midi_file_names:
        notes, durations = read_midi(path + fn)
        note_data_by_file.append(notes)
        duration_data_by_file.append(durations)

    note_data_by_file = np.array(note_data_by_file)
    duration_data_by_file = np.array(duration_data_by_file)

#print(note_data_by_file)
print(note_data_by_file.shape)
#print(duration_data_by_file)
print(duration_data_by_file.shape)

Data path:  /Users/danielrjohnson/Documents/OneDrive/Programming/AIO/Music-Generation/data/
# of Files Found:  35
(35,)
(35,)


In [3]:
#list comp to flatten
flattened_note_data = [element for note_ in note_data_by_file for element in note_]
print("# of Notes Total: ", len(flattened_note_data))

all_unique_notes = list(set(flattened_note_data))
print("# of Unique Notes: ", len(all_unique_notes))

# of Notes Total:  29748
# of Unique Notes:  993


In [4]:
note_counts = dict(Counter(flattened_note_data))

MIN_COUNT = 25
frequent_notes = [note_ for note_, count in note_counts.items() if count >= MIN_COUNT]
print("# of Notes Occurring At Least", MIN_COUNT, "Times: ", len(frequent_notes))

# of Notes Occurring At Least 25 Times:  216


In [5]:
freq_note_data = [list(filter(lambda x: x in frequent_notes, midi_data)) for midi_data in note_data_by_file]
freq_note_data = np.array(freq_note_data, dtype=object)

In [6]:
n_timesteps = 32
X, y_note, y_duration = [], [], []

for notes_of_file, durations_of_file in zip(note_data_by_file, duration_data_by_file): #freq_note_data:
    for i in range(0, len(notes_of_file) - n_timesteps):
        # (n_timesteps) length note sequence
        X.append([notes_of_file[i:i + n_timesteps], durations_of_file[i:i + n_timesteps]])
        # the note following that sequence
        y_note.append(notes_of_file[i + n_timesteps])
        # the duration of the note following that sequence
        y_duration.append(durations_of_file[i + n_timesteps])


X = np.array(X)
y_note = np.array(y_note)
y_duration = np.array(y_duration)

In [7]:
unique_x_note = list(set(X[:, 0].ravel()))
x_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_x_note))
x_seq_note = np.array([[x_note_to_int[note] for note in row] for row in X[:, 0]])

unique_x_dur = list(set(X[:, 1].ravel()))
x_dur_to_int = dict((duration, number) for number, duration in enumerate(unique_x_dur))
x_seq_dur = np.array([[x_dur_to_int[duration] for duration in row] for row in X[:, 1]])

# ([Nx32x1], [Nx32x1]) -> ([Nx32x2])
x_seq_combined = np.array(
    [ [ [note, duration] for note, duration in zip(row_note, row_dur) ]
        for row_note, row_dur in zip(x_seq_note, x_seq_dur) ]
)
print("combined note and duration array shape:", x_seq_combined.shape)

unique_y_note = list(set(y_note))
y_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_y_note)) 
y_seq_note = np.array([y_note_to_int[note] for note in y_note])

unique_y_duration = list(set(y_duration))
y_dur_to_int = dict((duration_, number) for number, duration_ in enumerate(unique_y_duration))
y_seq_dur = np.array([y_dur_to_int[duration] for duration in y_duration])

combined note and duration array shape: (28915, 32, 2)


In [8]:
x_tr_note, x_val_note, y_tr_note, y_val_note = train_test_split(x_seq_combined, y_seq_note, test_size=0.2, random_state=0)
x_tr_dur, x_val_dur, y_tr_dur, y_val_dur = train_test_split(x_seq_combined, y_seq_dur, test_size=0.2, random_state=0)

In [9]:
inputs = Input((n_timesteps, 2))
x = LSTM(128)(inputs)
x = Dense(128, activation="relu")(x)
output = Dense(128, activation="softmax")(x)

model_note = tf.keras.Model(inputs, output)

model_note.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001))
model_note.summary()

Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 2)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               67072     
_________________________________________________________________
dense (Dense)                (None, 128)               16512     
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
Total params: 100,096
Trainable params: 100,096
Non-trainable params: 0
_________________________________________________________________


In [10]:
mc = ModelCheckpoint('models/best_model_note.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)
history = model_note.fit(x=x_tr_note, y=y_tr_note,
                    batch_size=128, epochs=10, 
                    validation_data=(x_val_note, y_val_note),
                    verbose=1, callbacks=[mc]
)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.51282, saving model to models/best_model_note.h5
Epoch 2/10

Epoch 00002: val_loss improved from 0.51282 to 0.48428, saving model to models/best_model_note.h5
Epoch 3/10

Epoch 00003: val_loss improved from 0.48428 to 0.47491, saving model to models/best_model_note.h5
Epoch 4/10

Epoch 00004: val_loss improved from 0.47491 to 0.47032, saving model to models/best_model_note.h5
Epoch 5/10

Epoch 00005: val_loss improved from 0.47032 to 0.46853, saving model to models/best_model_note.h5
Epoch 6/10

Epoch 00006: val_loss improved from 0.46853 to 0.46419, saving model to models/best_model_note.h5
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.46419
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.46419
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.46419
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.46419


In [11]:
inputs = Input((n_timesteps, 2))
x = LSTM(128)(inputs)
x = Dense(128, activation="relu")(x)
output = Dense(128, activation="softmax")(x)

model_dur = tf.keras.Model(inputs, output)

model_dur.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001))
model_dur.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 32, 2)]           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               67072     
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_3 (Dense)              (None, 128)               16512     
Total params: 100,096
Trainable params: 100,096
Non-trainable params: 0
_________________________________________________________________


In [12]:
mc = ModelCheckpoint('models/best_model_dur.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)
history = model_dur.fit(x=x_tr_dur, y=y_tr_dur,
                    batch_size=128, epochs=10, 
                    validation_data=(x_val_dur, y_val_dur),
                    verbose=1, callbacks=[mc]
)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.83955, saving model to models/best_model_dur.h5
Epoch 2/10

Epoch 00002: val_loss improved from 1.83955 to 1.65424, saving model to models/best_model_dur.h5
Epoch 3/10

Epoch 00003: val_loss improved from 1.65424 to 1.64317, saving model to models/best_model_dur.h5
Epoch 4/10

Epoch 00004: val_loss improved from 1.64317 to 1.63941, saving model to models/best_model_dur.h5
Epoch 5/10

Epoch 00005: val_loss improved from 1.63941 to 1.63602, saving model to models/best_model_dur.h5
Epoch 6/10

Epoch 00006: val_loss improved from 1.63602 to 1.63499, saving model to models/best_model_dur.h5
Epoch 7/10

Epoch 00007: val_loss improved from 1.63499 to 1.63300, saving model to models/best_model_dur.h5
Epoch 8/10

Epoch 00008: val_loss did not improve from 1.63300
Epoch 9/10

Epoch 00009: val_loss improved from 1.63300 to 1.63261, saving model to models/best_model_dur.h5
Epoch 10/10

Epoch 00010: val_loss did not improve from 1.63261


In [15]:
# initial_note_sequence = x_val_note[np.random.randint(0, len(x_val_note))]
# initial_duration_sequence = x_val_dur[np.random.randint(0, len(x_val_dur))]
initial_sequence = x_val_note[np.random.randint(0, len(x_val_note))]
x_int_to_note = {v: k for k, v in x_note_to_int.items()}
x_int_to_dur = {v: k for k, v in x_dur_to_int.items()}

produce_song(initial_sequence, x_int_to_note, x_int_to_dur, 
            n_notes=20, midi_file_path="songs/song.mid")