In [1]:
import os 
import music21
import music21.instrument
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

2024-10-14 18:30:25.349045: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
# read from melodyData.txt
with open('out/melodyData.txt', 'r') as f:
    parts = eval(f.read())[:30]



In [12]:
# one-hot encoding
# encodings is like a dictionary 

no_parts = 100

encodings = {}
encodingIndex = 0
for part in parts[:no_parts]:
    for note in part:
        if note not in encodings:
            encodings[note] = encodingIndex
            encodingIndex += 1

decodings = {}
for k, v in encodings.items():
    decodings[v] = k        

data_encoded = []
for part in parts[:no_parts]:
    data_encoded.append([encodings[note] for note in part])

In [25]:
# number of notes per sequence to train the model on
sequence_length = 10

X = []
Y = []

# given data_encoded, generate training data by looping
for i in range(len(data_encoded)):
    
    for j in range(len(data_encoded[i]) - sequence_length):
        # from j to j + sequence_length, append to X
        X.append(data_encoded[i][j:j + sequence_length])
        # append the next note to Y for prediction purposes
        Y.append(data_encoded[i][j + sequence_length])

print (X, Y)

# one-hot encode cache
encodings_onehot = {}

# creating an array of values where 1 is mapped to the corresponding dictionary value in encodings
# k is note tuple, v is encoding index
for k, v in encodings.items():
    #create a new array of zeros with length of encodings
    onehot = np.zeros(len(encodings))
    #set the index of the note to 1
    onehot[v] = 1
    #append to encodings_onehot
    encodings_onehot[v] = onehot

# one-hot encode X
# for each sequence in X, we will convert it to a one-hot encoding
X_onehot = []
for seq in X:
    X_onehot.append(np.array([ encodings_onehot[note] for note in seq]))
X = X_onehot

X_train = X[:int(len(X) * 0.7)] 
X_test = X[int(len(X) * 0.8):]
Y_train = Y[:int(len(Y) * 0.7)]
Y_test = Y[int(len(Y) * 0.8):]

X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)


[[0, 0, 0, 1, 2, 3, 4, 4, 5, 2], [0, 0, 1, 2, 3, 4, 4, 5, 2, 3], [0, 1, 2, 3, 4, 4, 5, 2, 3, 4], [1, 2, 3, 4, 4, 5, 2, 3, 4, 4], [2, 3, 4, 4, 5, 2, 3, 4, 4, 5], [3, 4, 4, 5, 2, 3, 4, 4, 5, 2], [4, 4, 5, 2, 3, 4, 4, 5, 2, 3], [4, 5, 2, 3, 4, 4, 5, 2, 3, 4], [5, 2, 3, 4, 4, 5, 2, 3, 4, 4], [2, 3, 4, 4, 5, 2, 3, 4, 4, 4], [3, 4, 4, 5, 2, 3, 4, 4, 4, 6], [4, 4, 5, 2, 3, 4, 4, 4, 6, 7], [4, 5, 2, 3, 4, 4, 4, 6, 7, 7], [5, 2, 3, 4, 4, 4, 6, 7, 7, 8], [2, 3, 4, 4, 4, 6, 7, 7, 8, 9], [3, 4, 4, 4, 6, 7, 7, 8, 9, 10], [4, 4, 4, 6, 7, 7, 8, 9, 10, 11], [4, 4, 6, 7, 7, 8, 9, 10, 11, 12], [4, 6, 7, 7, 8, 9, 10, 11, 12, 13], [6, 7, 7, 8, 9, 10, 11, 12, 13, 6], [7, 7, 8, 9, 10, 11, 12, 13, 6, 6], [7, 8, 9, 10, 11, 12, 13, 6, 6, 10], [8, 9, 10, 11, 12, 13, 6, 6, 10, 9], [9, 10, 11, 12, 13, 6, 6, 10, 9, 7], [10, 11, 12, 13, 6, 6, 10, 9, 7, 7], [11, 12, 13, 6, 6, 10, 9, 7, 7, 9], [12, 13, 6, 6, 10, 9, 7, 7, 9, 14], [13, 6, 6, 10, 9, 7, 7, 9, 14, 6], [6, 6, 10, 9, 7, 7, 9, 14, 6, 12], [6, 10, 9, 7, 7, 9,

In [20]:
# TODO: make the model itself
# benchmark: dense is 80% accuracy

model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(sequence_length, len(encodings))),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dropout(.4),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(encodings), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, Y_train, epochs=20)

Epoch 1/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.0495 - loss: 4.6857
Epoch 2/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1857 - loss: 3.9605
Epoch 3/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3669 - loss: 2.9769
Epoch 4/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6955 - loss: 2.0120
Epoch 5/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8054 - loss: 1.2556
Epoch 6/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9094 - loss: 0.6639
Epoch 7/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9402 - loss: 0.4106
Epoch 8/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9672 - loss: 0.2540
Epoch 9/20
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x14b61d820>

In [22]:
loss, accuracy = model.evaluate(np.array(X_test), np.array(Y_test), verbose=1)
print(f'Loss: {loss}, Accuracy: {accuracy}')

print()

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0641 - loss: 6.3829 
Loss: 6.259538173675537, Accuracy: 0.07264957576990128



In [24]:
import copy
# given a sequence, generate the next note (up to ten times)
seq = copy.deepcopy(X_train[1])
total_seq = seq[:]
for i in range(10):

    #2d array that predicts the probability of the next note
    pred = model.predict(np.array([seq]))

    #this line takes the prediction with the highest probability
    pred = np.argmax(pred)

    # append to sequence the one-hot encoding of pred
    one_hot = np.zeros((len(encodings)))
    one_hot[pred] = 1
    total_seq = np.append(total_seq, [one_hot], axis=0)
    # set sequence to be the last ten values of total_seq
    seq = total_seq[-10:]

mapping = {0: "C", 1: "C#", 2: "D", 3: "D#", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "A#", 11: "B"}
    
for idx, note in enumerate(total_seq):
    if idx == 10:
        print ("New part ")
    pitch, dur = decodings[np.argmax(note)]
    print (mapping[(pitch - 1) % 12] if pitch != 0 else "Rest", dur)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Rest 3.00
Rest 3.00
Rest 2.00
C 1.00
D 1.00
F 1.00
F 1.00
F 2.00
C 1.00
D 1.00
New part 
F 1.00
F 1.00
F 2.00
C 1.00
D 1.00
F 1.00
F 1.00
F 1.00
G 1.00
A 1.00
