In [9]:
import os 
import music21
import music21.instrument
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [10]:
# read from melodyData.txt
with open('out/melodyData.txt', 'r') as f:
    parts = eval(f.read())



In [11]:
# one-hot encoding
# encodings is like a dictionary 

no_parts = 100

encodings = {}
encodingIndex = 0
for part in parts[:no_parts]:
    for note in part:
        if note not in encodings:
            encodings[note] = encodingIndex
            encodingIndex += 1

decodings = {}
for k, v in encodings.items():
    decodings[v] = k        

data_encoded = []
for part in parts[:no_parts]:
    data_encoded.append([encodings[note] for note in part])

In [None]:
# number of notes per sequence to train the model on
sequence_length = 10

X = []
Y = []

# given data_encoded, generate training data by looping
for i in range(len(data_encoded)):
    
    for j in range(len(data_encoded[i]) - sequence_length):
        # from j to j + sequence_length, append to X
        X.append(data_encoded[i][j:j + sequence_length])
        # append the next note to Y for prediction purposes
        Y.append(data_encoded[i][j + sequence_length])

print (X, Y)

# one-hot encode cache
encodings_onehot = {}

# creating an array of values where 1 is mapped to the corresponding dictionary value in encodings
# k is note tuple, v is encoding index
for k, v in encodings.items():
    #create a new array of zeros with length of encodings
    onehot = np.zeros(len(encodings))
    #set the index of the note to 1
    onehot[v] = 1
    #append to encodings_onehot
    encodings_onehot[v] = onehot

# one-hot encode X
# for each sequence in X, we will convert it to a one-hot encoding
X_onehot = []
for seq in X:
    X_onehot.append(np.array([ encodings_onehot[note] for note in seq]))
X = X_onehot

X_train = X[:int(len(X) * 0.7)] 
X_test = X[int(len(X) * 0.8):]
Y_train = Y[:int(len(Y) * 0.7)]
Y_test = Y[int(len(Y) * 0.8):]

X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)


In [None]:
# TODO: make the model itself
# benchmark: dense is 80% accuracy

model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(sequence_length, len(encodings))),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    # tf.keras.layers.Dropout(.4),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(encodings), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, Y_train, epochs=20)

In [None]:
loss, accuracy = model.evaluate(np.array(X_test), np.array(Y_test), verbose=1)
print(f'Loss: {loss}, Accuracy: {accuracy}')

print()

In [None]:
import copy
# given a sequence, generate the next note (up to ten times)
seq = copy.deepcopy(X_train[1])
total_seq = seq[:]
for i in range(10):

    #2d array that predicts the probability of the next note
    pred = model.predict(np.array([seq]))

    #this line takes the prediction with the highest probability
    pred = np.argmax(pred)

    # append to sequence the one-hot encoding of pred
    one_hot = np.zeros((len(encodings)))
    one_hot[pred] = 1
    total_seq = np.append(total_seq, [one_hot], axis=0)
    # set sequence to be the last ten values of total_seq
    seq = total_seq[-10:]

mapping = {0: "C", 1: "C#", 2: "D", 3: "D#", 4: "E", 5: "F", 6: "F#", 7: "G", 8: "G#", 9: "A", 10: "A#", 11: "B"}
    
for idx, note in enumerate(total_seq):
    if idx == 10:
        print ("New part ")
    pitch, dur = decodings[np.argmax(note)]
    print (mapping[(pitch - 1) % 12] if pitch != 0 else "Rest", dur)


In [53]:
import tinysoundfont
import time

In [62]:

synth = tinysoundfont.Synth()
sfid = synth.sfload("Live_Party_SoundFont__Techno_.sf2", -5)
synth.program_change(2, sfid, 1)
synth.start()

def play_tone(midi_pitch, duration):
    synth.noteon(0, midi_pitch, 127)
    time.sleep(duration)
    synth.noteoff(0, midi_pitch)

def wait(duration):
    time.sleep(duration)

bpm = 92
time_multiplier = 60 / bpm

# play the sequence
for note in total_seq:
    pitch, dur = decodings[np.argmax(note)]
    adjusted_dur = float(dur) * time_multiplier
    if (pitch <= 0):
        wait(adjusted_dur)
    else:
        play_tone(pitch + 47, adjusted_dur)

synth.stop()