In [None]:
%pip install music21
%pip install keras
%pip install tensorflow
%pip install h5py
%pip install matplotlib
%pip install numpy
%pip install pandas
%pip install tqdm
%pip install keras-metrics




In [17]:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord
import keras
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import BatchNormalization as BatchNorm
from keras.callbacks import ModelCheckpoint

In [None]:
%unzip jazz_and_stuff.zip

Archive:  jazz_and_stuff.zip
   creating: jazz_and_stuff/
  inflating: jazz_and_stuff/ACrush.mid  
  inflating: jazz_and_stuff/Adventage.mid  
  inflating: jazz_and_stuff/Aintchagottired.mid  
  inflating: jazz_and_stuff/ALittleMarkMusic.mid  
  inflating: jazz_and_stuff/Alphonso.mid  
  inflating: jazz_and_stuff/AtLast.mid  
  inflating: jazz_and_stuff/AutumnLeaves(LesFeuillesMortes).mid  
  inflating: jazz_and_stuff/BackhomeagaininIndiana.mid  
  inflating: jazz_and_stuff/BeautifulGirl.mid  
  inflating: jazz_and_stuff/BibonoAozora.mid  
  inflating: jazz_and_stuff/Bluesette.mid  
  inflating: jazz_and_stuff/BothSidesNow.mid  
  inflating: jazz_and_stuff/caitsith.mid  
  inflating: jazz_and_stuff/Charade.mid  
  inflating: jazz_and_stuff/ChickenFeed.mid  
  inflating: jazz_and_stuff/Cids.mid  
  inflating: jazz_and_stuff/cosmo.mid  
  inflating: jazz_and_stuff/costadsol.mid  
  inflating: jazz_and_stuff/dayafter.mid  
  inflating: jazz_and_stuff/decisive.mid  
  inflating: jazz_and_s

In [None]:
import os

def get_notes(directory = "jazz_and_stuff", train = True):
    """ Get all the notes, chords, and rests from the midi files """
    tokens = []

    for file in glob.glob(directory+"/*.mid"):
        midi = converter.parse(file)

        notes_to_parse = None

        try:
            parse_instrums = instrument.partitionByInstrument(midi)
            notes_to_parse = parse_instrums.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes

        note_offsets = []
        for element in notes_to_parse:
            if isinstance(element, note.Note) or isinstance(element, chord.Chord):
                note_offsets.append(element.offset)

        note_offsets.sort()

        prev_offset = -1
        for element in notes_to_parse:
            if isinstance(element, note.Note):
                if prev_offset >= 0 and element.offset - prev_offset > 0.5:
                    rest_duration = round((element.offset - prev_offset) * 2) / 2
                    tokens.append(f"REST_{rest_duration}") # added rests for tokens
                    # this expands vocab, old weights cannot be loaded

                tokens.append(str(element.pitch))
                prev_offset = element.offset

            elif isinstance(element, chord.Chord):
                if prev_offset >= 0 and element.offset - prev_offset > 0.5:
                    rest_duration = round((element.offset - prev_offset) * 2) / 2
                    tokens.append(f"REST_{rest_duration}")

                tokens.append('.'.join(str(n) for n in element.normalOrder))
                prev_offset = element.offset

    file_path = './data/tokens'
    dir_path = os.path.dirname(file_path)

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    with open('./data/tokens', 'wb') as filepath:
        pickle.dump(tokens, filepath)
    return tokens

In [35]:
def prepare_sequences_train(notes, n_vocab):
    sequence_length = 20

    pitches = sorted(set(item for item in notes))
    note_to_int = dict((note, number) for number, note in enumerate(pitches))

    net_in = []
    net_out = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        net_in.append([note_to_int[char] for char in sequence_in])
        net_out.append(note_to_int[sequence_out])

    n_patterns = len(net_in)

    net_in = numpy.reshape(net_in, (n_patterns, sequence_length, 1))
    net_in = net_in / float(n_vocab)

    # Categorical encoding of output
    net_out = tf.keras.utils.to_categorical(net_out, num_classes=n_vocab)
    print(net_in)
    print("\n", net_out)
    return (net_in, net_out)


In [None]:
from keras.optimizers import Adam

def layer_model(network_input, n_vocab):
    model = Sequential()
    model.add(LSTM(
        512,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.5, #using .5 dropout to avoid overfitting, change as hyperparam
        return_sequences=True
    ))
    model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
    model.add(LSTM(512))
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(256))
    # model.add(Activation('relu')) # this relu does not help much
    model.add(BatchNorm())
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('relu'))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer= keras.optimizers.Adam())#try replacing with adam #replaced with adam

    return model

In [52]:

def train(model, network_input, network_output, finetune = False):
    filepath = "weights_checkpoint.keras"
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    callbacks_list = [checkpoint]
    if finetune:
      model.load_weights('weights_checkpoint.keras')

    model.fit(network_input, network_output, epochs=200, batch_size=128, callbacks=callbacks_list)



def train_network():
    notes = get_notes()

    n_vocab = len(set(notes))

    network_input, network_output = prepare_sequences_train(notes, n_vocab)

    model = layer_model(network_input, n_vocab)

    train(model, network_input, network_output, False)



train_network()

[[[0.56917688]
  [0.30297723]
  [0.8704028 ]
  ...
  [0.75131349]
  [0.8704028 ]
  [0.73029772]]

 [[0.30297723]
  [0.8704028 ]
  [0.7915937 ]
  ...
  [0.8704028 ]
  [0.73029772]
  [0.29246935]]

 [[0.8704028 ]
  [0.7915937 ]
  [0.42732049]
  ...
  [0.73029772]
  [0.29246935]
  [0.73029772]]

 ...

 [[0.78809107]
  [0.77758319]
  [0.75131349]
  ...
  [0.76532399]
  [0.73204904]
  [0.81436077]]

 [[0.77758319]
  [0.75131349]
  [0.85989492]
  ...
  [0.73204904]
  [0.81436077]
  [0.73204904]]

 [[0.75131349]
  [0.85989492]
  [0.78809107]
  ...
  [0.81436077]
  [0.73204904]
  [0.76532399]]]

 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Epoch 1/250
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 89ms/step - loss: 5.8242
Epoch 2/250
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 89ms/step - loss: 4.7991
Epoch 3/250
[1m423/423[0m [32m━━━━━━━

In [53]:
# +--------------------------------------------------------------------------------------------+
# |                      inference                                                             |
# +--------------------------------------------------------------------------------------------+

In [None]:
from keras.optimizers import Adam

def inference_network(network_input, n_vocab):
  """ Create the structure of the neural network """
  model = Sequential()
  model.add(LSTM(
      512,
      input_shape=(network_input.shape[1], network_input.shape[2]),
      recurrent_dropout=0.3,
      return_sequences=True
  ))
  model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
  model.add(LSTM(512))
  model.add(BatchNorm())
  model.add(Dropout(0.3))
  model.add(Dense(256))
  model.add(Activation('relu'))
  model.add(BatchNorm())
  model.add(Dropout(0.3))
  model.add(Dense(n_vocab))
  model.add(Activation('softmax'))
  model.compile(loss='categorical_crossentropy', optimizer= keras.optimizers.Adam()) # note still try to use adam here

  model.load_weights('weights_checkpoint.keras')

  return model


In [54]:
def ready_seqs(notes, pitches, lenvocab):
    note_to_int = dict((note, number) for number, note in enumerate(pitches))

    sequence_length = 100
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    normalized_input = normalized_input / float(lenvocab)

    return (network_input, normalized_input)

In [57]:

def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    start = numpy.random.randint(0, len(network_input)-1)

    retoken = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []

    for note_index in range(500):
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)
        #this normalizes the top 10 indexes by probablity to 1, and then probabalistically picks them at their respective rates.
        top_idx = numpy.argsort(prediction[0])[-5:] #change this to 10
        top_probabilities = prediction[0][top_idx]
        prob_norm = top_probabilities / numpy.sum(top_probabilities)
        selected_idx = numpy.random.choice(top_idx, p=prob_norm)
        result = retoken[selected_idx]
        prediction_output.append(result)

        pattern.append(selected_idx)
        pattern = pattern[1:len(pattern)]

    return prediction_output


In [58]:

from music21 import stream, note, chord, instrument

!cp weights_checkpoint.keras weights.keras
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    for pattern in prediction_output:
        # Handle rests
        if pattern.startswith('REST_'):
            try:
                rest_duration = float(pattern.split('_')[1])
                offset += rest_duration
                continue
            except:
                offset += 0.5 #default interval diff
                continue

        elif ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='generated.mid')

In [55]:

def generate():
    """ Generate a piano midi file """
    with open('data/tokens', 'rb') as filepath:
        notes = pickle.load(filepath)

    pitches = sorted(set(item for item in notes))
    lenvocab = len(set(notes))

    network_input, normalized_input = ready_seqs(notes, pitches, lenvocab)
    model = inference_network(normalized_input, lenvocab)
    prediction_output = generate_notes(model, network_input, pitches, lenvocab)
    create_midi(prediction_output)


In [None]:
generate()


#commenting for training on the scc
# from google.colab import files
# files.download('weights_checkpoint.keras')
# files.download('generated.mid')

  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from keras.models import load_model
from keras.utils import to_categorical
import numpy as np

# notes_eval = get_notes(directory="test_data")
# n_vocab_eval = len(set(notes_eval))
# network_input_eval, network_output_eval = prepare_sequences_train(notes_eval, n_vocab_eval)



def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model on a test dataset.
    """
    loss = model.evaluate(X_test, y_test, verbose=1)
    print(f"Test Loss: {loss:.4f}")
    return loss




n_vocab = len(set(notes))

network_input, network_output = prepare_sequences_train(notes, n_vocab)

model = layer_model(network_input, n_vocab)

model.load_weights('weights_checkpoint.keras')

evaluate_model(model, network_input, network_output)

[[[0.56917688]
  [0.30297723]
  [0.8704028 ]
  ...
  [0.75131349]
  [0.8704028 ]
  [0.73029772]]

 [[0.30297723]
  [0.8704028 ]
  [0.7915937 ]
  ...
  [0.8704028 ]
  [0.73029772]
  [0.29246935]]

 [[0.8704028 ]
  [0.7915937 ]
  [0.42732049]
  ...
  [0.73029772]
  [0.29246935]
  [0.73029772]]

 ...

 [[0.78809107]
  [0.77758319]
  [0.75131349]
  ...
  [0.76532399]
  [0.73204904]
  [0.81436077]]

 [[0.77758319]
  [0.75131349]
  [0.85989492]
  ...
  [0.73204904]
  [0.81436077]
  [0.73204904]]

 [[0.75131349]
  [0.85989492]
  [0.78809107]
  ...
  [0.81436077]
  [0.73204904]
  [0.76532399]]]

 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[1m1690/1690[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 22ms/step - loss: 0.0805
Test Loss: 0.0739


0.0739288404583931

In [63]:
from music21 import converter, instrument, note, chord
def read_midi(file_path):
    """Reads a MIDI file and returns a list of note/chord strings including rests."""
    notes = []

    midi = converter.parse(file_path)

    try:
        parts = instrument.partitionByInstrument(midi)
        elements = parts.parts[0].recurse() if parts else midi.flat.notes
    except:
        elements = midi.flat.notes

    prev_offset = -1
    for element in elements:
        if prev_offset >= 0 and element.offset - prev_offset > 0.5:
            rest_duration = round((element.offset - prev_offset) * 2) / 2
            notes.append(f"REST_{rest_duration}")

        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            notes.append('.'.join(str(n) for n in element.normalOrder))

        prev_offset = element.offset

    return notes

file_path = "./generated.mid"
note_sequence = read_midi(file_path)
print(note_sequence[:100])

['REST_4.0', 'D3', 'REST_1.0', '4.7', 'REST_1.0', '4.7', 'G6', 'G6', 'REST_1.5', 'B3', 'REST_1.0', 'C4', 'C4', 'REST_1.5', 'B3', 'REST_1.0', 'D4', 'D4', 'F#6', 'F#5', 'A5', 'A5', 'D6', 'D6', 'D6', 'A5', 'F#5', 'REST_1.0', 'A5', 'REST_1.0', '2.6.9', 'D6', 'REST_1.0', 'A5', 'REST_1.0', '4.7', 'C#3', 'C#3', '4.5.9', 'C#3', 'E3', 'REST_1.0', 'E3', 'C#6', 'E3', 'A5', 'B-5', 'A5', 'C#3', 'E4', 'C4', 'G6', 'G6', 'C#4', '10.11', 'C#3', '4.7.9.0', 'E4', 'G4', 'REST_1.0', 'REST_1.5', '2.6.9', 'E3', 'E5', 'F#2', 'A5', 'A5', 'A5', 'REST_1.0', '4.7', '2.5.7.10', 'REST_1.0', 'B3', '4.7', 'REST_1.0', 'B3', 'B3', 'E3', 'REST_1.0', 'A5', 'A3', '4.7', 'REST_1.0', 'REST_1.5', 'A4', 'A4', '4.7', 'REST_1.0', 'REST_2.0', 'REST_1.0', '4.7', 'REST_1.0', '4.7', '4.7', 'REST_1.0', 'D2', '4.7', 'REST_1.0', '4.7', 'REST_1.0']
