In [1]:
import os 
import music21
import music21.instrument
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
# read from melodyData.txt
with open('generated/melodyDataShort.txt', 'r') as f:
    parts = eval(f.read())

# one-hot encoding
# encodings is like a dictionary 
encodings = {}
encodingIndex = 0
for part in parts:
    for note in part:
        if note not in encodings:
            encodings[note] = encodingIndex
            encodingIndex += 1
print(len(encodings), "encodings")

decodings = {}
for k, v in encodings.items():
    decodings[v] = k        

# the actual data that is encoded using the created dictionary
# encode everything in indices first
data_encoded = []
for part in parts:
    data_encoded.append([encodings[note] for note in part])


227 encodings


In [3]:
print(data_encoded)

[[0, 0, 0, 1, 2, 3, 4, 4, 5, 2, 3, 4, 4, 5, 2, 3, 4, 4, 4, 6, 7, 7, 8, 9, 10, 11, 12, 13, 6, 6, 10, 9, 7, 7, 9, 14, 6, 12, 14, 12, 13, 15, 2, 3, 4, 4, 5, 2, 3, 4, 4, 5, 2, 3, 4, 4, 4, 6, 7, 9, 10, 9, 14, 12, 14, 5, 2, 3, 4, 4, 4, 6, 7, 7, 8, 16, 11, 12, 13, 6, 6, 10, 9, 7, 7, 9, 14, 6, 12, 14, 17, 9, 18, 9, 14, 16, 16, 8, 19, 10, 16, 8, 8, 20, 21, 20, 22, 23, 10, 24, 24, 9, 10, 24, 24, 9, 10, 24, 24, 25, 20, 25, 26, 23, 27, 28, 28, 28, 26, 27, 23, 29, 27, 23, 27, 30, 22, 23, 31, 8, 8, 23, 10, 16, 16, 10, 9, 7, 7, 9, 14, 32, 14, 12, 4, 7, 16, 33, 10, 9, 8, 8, 24, 18, 9, 14, 16, 16, 10, 9, 7, 7, 9, 14, 34, 17, 14, 12, 35, 0, 0, 0, 1, 2, 3, 4, 4, 5, 2, 3, 4, 4, 5, 2, 3, 4, 4, 4, 6, 7, 7, 8, 9, 10, 11, 12, 13, 6, 6, 10, 9, 7, 7, 9, 14, 6, 12, 14, 12, 13, 15, 2, 3, 4, 4, 5, 2, 3, 4, 4, 5, 2, 3, 4, 4, 4, 6, 7, 9, 10, 9, 14, 12, 14, 5, 2, 3, 4, 4, 4, 6, 7, 7, 8, 16, 11, 12, 13, 6, 6, 10, 9, 7, 7, 9, 14, 6, 12, 14, 17, 9, 18, 9, 14, 16, 16, 8, 19, 10, 16, 8, 8, 20, 21, 20, 22, 23, 10, 24, 24, 

In [4]:
#number of notes per sequence to train the model on
sequence_length = 10

X = []
Y = []

# given data_encoded, generate training data by looping
for i in range(len(data_encoded)):
    
    for j in range(len(data_encoded[i]) - sequence_length):
        # from j to j + sequence_length, append to X
        X.append(data_encoded[i][j:j + sequence_length])
        # append the next note to Y for prediction purposes
        Y.append(data_encoded[i][j + sequence_length])



# one-hot encode cache
encodings_onehot = {}

# creating an array of values where 1 is mapped to the corresponding dictionary value in encodings
# k is note tuple, v is encoding index
for k, v in encodings.items():
    #create a new array of zeros with length of encodings
    onehot = np.zeros(len(encodings))
    #set the index of the note to 1
    onehot[v] = 1
    #append to encodings_onehot
    encodings_onehot[v] = onehot

# one-hot encode X
# for each sequence in X, we will convert it to a one-hot encoding
X_onehot = []
for seq in X:
    X_onehot.append(np.array([ encodings_onehot[note] for note in seq]))
X = X_onehot

# split this into training and testing sets 
# we will intentionally overfit, GET RID OF THIS LATER
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [5]:
# TODO: make the model itself
# benchmark: dense is 80% accuracy


model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(sequence_length, len(encodings))),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dropout(.4),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(encodings), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(np.array(X_train), np.array(Y_train), epochs=5)


Epoch 1/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.1149 - loss: 4.9517
Epoch 2/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.3332 - loss: 3.1212
Epoch 3/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7265 - loss: 1.5833
Epoch 4/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8787 - loss: 0.7176
Epoch 5/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9383 - loss: 0.3657


<keras.src.callbacks.history.History at 0x1bc90da65c0>

In [8]:
loss, accuracy = model.evaluate(np.array(X_test), np.array(Y_test), verbose=1)
print(f'Loss: {loss}, Accuracy: {accuracy}')

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8321 - loss: 1.0142
Loss: 1.0959199666976929, Accuracy: 0.8243243098258972


In [9]:
import copy
# given a sequence, generate the next note (up to ten times)
seq = copy.deepcopy(X_train[1])
total_seq = seq[:]
for i in range(10):

    #2d array that predicts the probability of the next note
    pred = model.predict(np.array([seq]))

    #this line takes the prediction with the highest probability
    pred = np.argmax(pred)

    # append to sequence the one-hot encoding of pred
    one_hot = np.zeros((len(encodings)))
    one_hot[pred] = 1
    total_seq = np.append(total_seq, [one_hot], axis=0)
    # set sequence to be the last ten values of total_seq
    seq = total_seq[-10:]

    #total_seq.append(pred)
    # total seq is a numpy array, can't just append things to it

mapping = {0: "C", 1: "C#", 2: "D", 3: "D#", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "A#", 11: "B"}
    
for note in total_seq:
    pitch, dur = decodings[np.argmax(note)]
    print (mapping[(pitch - 1) % 12] if pitch != 0 else "Rest", dur)

# convert numbers to musical note letters
# 60 = C

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
F 1.00
A 1.00
F 0.75
A 0.50
Rest 0.25
A# 0.50
C 1.00
G 0.75
Rest 0.25
C 1.00
G 0.25
Rest 0.25
F# 0.25
Rest 0.25
F 1.00
B 0.25
C 0.25
B 0.25
Rest 0.25
B 0.25
