In [1]:
## Sequence Data
scales = ['C4', 'C4', 'G4', 'G4', 'A4', 'A4', 'G4', 'F4', 'F4', 'E4', 'E4', 'D4', 'D4', 'C4',
          'G4', 'G4', 'F4', 'F4', 'E4', 'E4', 'D4', 'G4', 'G4', 'F4', 'F4', 'E4', 'E4', 'D4',
          'C4', 'C4', 'G4', 'G4', 'A4', 'A4', 'G4', 'F4', 'F4', 'E4', 'E4', 'D4', 'D4', 'C4']
durations = [4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 8,
             4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 8,
             4, 4, 4, 4, 4, 4, 8, 4, 4, 4, 4, 4, 4, 8]

In [2]:
## Make an audio file to play
!pip install pretty_midi
import numpy as np
import pretty_midi as pm
import soundfile as sf
from IPython.display import Audio

def midi(predicted_notes):
  midi_data = pm.PrettyMIDI()

  piano_program = pm.instrument_name_to_program('Acoustic Grand Piano')
  piano = pm.Instrument(program=piano_program)

  currentTime = 0
  seconds4Quarter = 0.5/4.
  for scale, duration in predicted_notes:
    note_duration = seconds4Quarter * duration
    note = pm.Note(velocity=100, pitch=scale, start=currentTime, end=currentTime + note_duration)
    currentTime += note_duration
    piano.notes.append(note)
  midi_data.instruments.append(piano)
  audio_data = midi_data.synthesize()
  Audio(audio_data, rate=44100)
  sf.write('output_audio.wav', audio_data, 44100)

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.2-py3-none-any.whl (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Collecting packaging~=23.1 (from mido>=1.1.16->pretty_midi)
  Downloading packaging-23.2-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592289 sha256=45689efc4c7e1d3b0caab5d374db37eb249c5122eff989d9564f62cf2d447410
  Stored in direc

In [3]:
## Data prepration
def one_hot_encode_notes(notes):
  num_scales = 13
  num_durations = 2
  total_features = num_scales + num_durations

  one_hot_encoded = np.zeros((len(notes), total_features), dtype=int)

  for i, (scale_num, duration) in enumerate(notes):
    scale_index = scale_num - 60
    duration_index = 13 if duration == 4 else 14

    one_hot_encoded[i, scale_index] = 1
    one_hot_encoded[i, duration_index] = 1
  return one_hot_encoded

def prepare_sequence(encoded_notes, sequence_length=4):
  X = []
  y = []
  for i in range(len(encoded_notes) - sequence_length):
    sequence_in = encoded_notes[i:i + sequence_length]
    sequence_out = encoded_notes[i + sequence_length]
    X.append(sequence_in)
    y.append(sequence_out)
  return np.array(X), np.array(y)


note_numbers = []
for scale in scales:
  note_numbers.append(pm.note_name_to_number(scale))
notes = list(zip(note_numbers, durations))
encoded_notes = one_hot_encode_notes(notes)

sequence_length = 4
X, y = prepare_sequence(encoded_notes, sequence_length)

In [5]:
## Model Design: LSTM
### Make the model stateful
from keras.layers import Input, LSTM, Dense
from keras.models import Model

sequence_length = 4
num_features = 15
num_scale_units = 13
num_duration_units = 2

inputs = Input(batch_shape=(1, sequence_length, num_features))
lstm_out = LSTM(64, return_sequences=False, stateful=True)(inputs)
scale_output = Dense(num_scale_units, activation='softmax', name='scale_output')(lstm_out)
duration_output = Dense(num_duration_units, activation='softmax', name='duration_output')(lstm_out)

model = Model(inputs=inputs, outputs=[scale_output, duration_output])
model.compile(optimizer='Adam',
              loss={'scale_output':'categorical_crossentropy', 'duration_output':'categorical_crossentropy'},
              metrics={'scale_output':'accuracy', 'duration_output':'accuracy'})
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(1, 4, 15)]                 0         []                            
                                                                                                  
 lstm_1 (LSTM)               (1, 64)                      20480     ['input_2[0][0]']             
                                                                                                  
 scale_output (Dense)        (1, 13)                      845       ['lstm_1[0][0]']              
                                                                                                  
 duration_output (Dense)     (1, 2)                       130       ['lstm_1[0][0]']              
                                                                                              

In [10]:
## Model training
### reuse the model's states at every epoch
num_epochs = 2000
for epoch_idx in range(num_epochs):
  model.reset_states()
  if epoch_idx % 100 == 0:
    print('epochs : ' + str(epoch_idx))
    model.fit(X, [y[:, 0:num_scale_units], y[:, num_scale_units:num_features]],
            epochs=1, batch_size=1, verbose=2, validation_split=0.2)
  else:
    model.fit(X, [y[:, 0:num_scale_units], y[:, num_scale_units:num_features]],
            epochs=1, batch_size=1, verbose=0, validation_split=0.2)

epochs : 0
30/30 - 0s - loss: 0.1153 - scale_output_loss: 0.0453 - duration_output_loss: 0.0700 - scale_output_accuracy: 0.9667 - duration_output_accuracy: 0.9667 - val_loss: 0.1305 - val_scale_output_loss: 5.4389e-06 - val_duration_output_loss: 0.1305 - val_scale_output_accuracy: 1.0000 - val_duration_output_accuracy: 0.8750 - 280ms/epoch - 9ms/step
epochs : 50
30/30 - 0s - loss: 0.1091 - scale_output_loss: 0.0440 - duration_output_loss: 0.0651 - scale_output_accuracy: 0.9667 - duration_output_accuracy: 0.9667 - val_loss: 0.1376 - val_scale_output_loss: 9.9538e-06 - val_duration_output_loss: 0.1376 - val_scale_output_accuracy: 1.0000 - val_duration_output_accuracy: 0.8750 - 233ms/epoch - 8ms/step
epochs : 100
30/30 - 0s - loss: 0.1154 - scale_output_loss: 0.0473 - duration_output_loss: 0.0681 - scale_output_accuracy: 0.9667 - duration_output_accuracy: 0.9667 - val_loss: 0.1339 - val_scale_output_loss: 5.4389e-06 - val_duration_output_loss: 0.1339 - val_scale_output_accuracy: 1.0000 - 

In [11]:
## Model prediction test
### with only first four notes
prediction_length = y.shape[0]

given_notes = X[0:1]
input_sequence = given_notes
predicted_notes = []
for note in given_notes[0]:
  scale = np.argmax(note[0:num_scale_units], axis=-1) + 60
  duration = 4 if np.argmax(note[num_scale_units:num_features], axis=-1) == 0 else 8

for i in range(prediction_length):
  scale_pred, duration_pred = model.predict(input_sequence)
  next_scale = np.argmax(scale_pred, axis=-1)
  next_duration = 4 if np.argmax(duration_pred, axis=-1) == 0 else 8
  predicted_notes.append((next_scale.item() + 60, next_duration))

  next_note_encoded = np.zeros((1, 1, given_notes.shape[2]))
  next_note_encoded[0, 0, next_scale] = 1
  next_note_encoded[0, 0, -2 if next_duration == 4 else -1] = 1

  input_sequence = np.concatenate((input_sequence[:, 1:, :], next_note_encoded), axis=1)

print(predicted_notes)
midi(predicted_notes)

[(69, 4), (69, 4), (67, 8), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4)]


In [16]:
print(notes[4:])
print(predicted_notes)

[(69, 4), (69, 4), (67, 8), (65, 4), (65, 4), (64, 4), (64, 4), (62, 4), (62, 4), (60, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (60, 4), (60, 4), (67, 4), (67, 4), (69, 4), (69, 4), (67, 8), (65, 4), (65, 4), (64, 4), (64, 4), (62, 4), (62, 4), (60, 8)]
[(69, 4), (69, 4), (67, 8), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4), (65, 4), (65, 4), (64, 4), (64, 4), (62, 8), (67, 4), (67, 4)]
