In [1]:
#run with Python 3.11
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import TensorDataset, DataLoader


In [2]:
#Hyperparams
batch_size = 128

## Data Preprocessing


In [3]:
def get_notes(load_existing = True):
    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    if load_existing:
        print("loading existing notes")
        with open('data/notes', 'rb') as filepath:
            notes = pickle.load(filepath)
            return notes
    
    notes = []

    for file in glob.glob("midi_songs/*.mid"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))

    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes


#assemble training data in a readable format 
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    # get all pitch names
    pitchnames = sorted(set(item for item in notes))

     # create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    network_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = F.one_hot(torch.tensor(network_output)) #one hot encoding

    return (network_input, network_output)



In [4]:
notes = get_notes(load_existing=False) # set load_existing to True to skip reading in midi

# get amount of pitch names
n_vocab = len(set(notes))

network_input, network_output = prepare_sequences(notes, n_vocab)

#build the dataset + dataloader
input_tensor = torch.tensor(network_input, dtype=torch.float32)
output_tensor = torch.tensor(network_output, dtype=torch.float32)

dataset = TensorDataset(input_tensor, output_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

Parsing midi_songs\0fithos.mid
Parsing midi_songs\8.mid
Parsing midi_songs\ahead_on_our_way_piano.mid
Parsing midi_songs\AT.mid
Parsing midi_songs\balamb.mid
Parsing midi_songs\bcm.mid
Parsing midi_songs\BlueStone_LastDungeon.mid
Parsing midi_songs\braska.mid
Parsing midi_songs\caitsith.mid
Parsing midi_songs\Cids.mid
Parsing midi_songs\cosmo.mid
Parsing midi_songs\costadsol.mid
Parsing midi_songs\dayafter.mid
Parsing midi_songs\decisive.mid
Parsing midi_songs\dontbeafraid.mid
Parsing midi_songs\DOS.mid
Parsing midi_songs\electric_de_chocobo.mid
Parsing midi_songs\Eternal_Harvest.mid
Parsing midi_songs\EyesOnMePiano.mid
Parsing midi_songs\ff11_awakening_piano.mid
Parsing midi_songs\ff1battp.mid
Parsing midi_songs\FF3_Battle_(Piano).mid
Parsing midi_songs\FF3_Third_Phase_Final_(Piano).mid
Parsing midi_songs\ff4-airship.mid
Parsing midi_songs\Ff4-BattleLust.mid
Parsing midi_songs\ff4-fight1.mid
Parsing midi_songs\ff4-town.mid
Parsing midi_songs\FF4.mid
Parsing midi_songs\ff4pclov.mid
Par

  output_tensor = torch.tensor(network_output, dtype=torch.float32)


## Model Architecture

In [5]:
from models import MusicLSTM

def train(num_epochs, model, train_dataloader, loss_func, optimizer):
    losses = []
    total_steps = len(train_dataloader)

    for epoch in range(num_epochs):
        for batch, (sequence, next_note) in enumerate(train_dataloader):
    
            output = model(sequence)
            loss = loss_func(output, next_note)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (batch+1)%100 == 0:
                print(f"Epoch: {epoch+1}; Batch {batch+1} / {total_steps}; Loss: {loss.item():>4f}")
    
    return losses

## Training

In [6]:
model = MusicLSTM(input_len=network_input.shape[2], hidden_size=256, num_classes=n_vocab, num_layers=2)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0003)

losses = train(num_epochs=400, model=model, loss_func=loss_function, optimizer=optimizer, train_dataloader=dataloader)


Epoch: 1; Batch 100 / 359; Loss: 4.788625
Epoch: 1; Batch 200 / 359; Loss: 4.526455
Epoch: 1; Batch 300 / 359; Loss: 4.678929
Epoch: 2; Batch 100 / 359; Loss: 4.586399
Epoch: 2; Batch 200 / 359; Loss: 4.725073
Epoch: 2; Batch 300 / 359; Loss: 4.696541
Epoch: 3; Batch 100 / 359; Loss: 4.561501
Epoch: 3; Batch 200 / 359; Loss: 4.687592
Epoch: 3; Batch 300 / 359; Loss: 4.735240
Epoch: 4; Batch 100 / 359; Loss: 4.706417
Epoch: 4; Batch 200 / 359; Loss: 4.801732
Epoch: 4; Batch 300 / 359; Loss: 4.713769
Epoch: 5; Batch 100 / 359; Loss: 4.754982
Epoch: 5; Batch 200 / 359; Loss: 4.700130
Epoch: 5; Batch 300 / 359; Loss: 4.628604
Epoch: 6; Batch 100 / 359; Loss: 4.580406
Epoch: 6; Batch 200 / 359; Loss: 4.611865
Epoch: 6; Batch 300 / 359; Loss: 4.390574
Epoch: 7; Batch 100 / 359; Loss: 4.468637
Epoch: 7; Batch 200 / 359; Loss: 4.413692
Epoch: 7; Batch 300 / 359; Loss: 4.546490
Epoch: 8; Batch 100 / 359; Loss: 4.305113
Epoch: 8; Batch 200 / 359; Loss: 4.416423
Epoch: 8; Batch 300 / 359; Loss: 4

In [9]:
torch.save(model, "models/music_model.pt")

In [8]:

train(num_epochs=225, model=model, loss_func=loss_function, optimizer=optimizer, train_dataloader=dataloader)


Epoch: 1; Batch 100 / 359; Loss: 0.477301
Epoch: 1; Batch 200 / 359; Loss: 0.546556
Epoch: 1; Batch 300 / 359; Loss: 0.518844
Epoch: 2; Batch 100 / 359; Loss: 0.486598
Epoch: 2; Batch 200 / 359; Loss: 0.489876
Epoch: 2; Batch 300 / 359; Loss: 0.508604
Epoch: 3; Batch 100 / 359; Loss: 0.478998
Epoch: 3; Batch 200 / 359; Loss: 0.474869
Epoch: 3; Batch 300 / 359; Loss: 0.470879
Epoch: 4; Batch 100 / 359; Loss: 0.498757
Epoch: 4; Batch 200 / 359; Loss: 0.435737
Epoch: 4; Batch 300 / 359; Loss: 0.361770
Epoch: 5; Batch 100 / 359; Loss: 0.489261
Epoch: 5; Batch 200 / 359; Loss: 0.485608
Epoch: 5; Batch 300 / 359; Loss: 0.373657
Epoch: 6; Batch 100 / 359; Loss: 0.461851
Epoch: 6; Batch 200 / 359; Loss: 0.454362
Epoch: 6; Batch 300 / 359; Loss: 0.591164
Epoch: 7; Batch 100 / 359; Loss: 0.416573
Epoch: 7; Batch 200 / 359; Loss: 0.447249
Epoch: 7; Batch 300 / 359; Loss: 0.397537
Epoch: 8; Batch 100 / 359; Loss: 0.398073
Epoch: 8; Batch 200 / 359; Loss: 0.604189
Epoch: 8; Batch 300 / 359; Loss: 0

KeyboardInterrupt: 

In [None]:

# def create_network(network_input, n_vocab):
#     """ create the structure of the neural network """
#     model = Sequential()
#     model.add(LSTM(
#         512,
#         input_shape=(network_input.shape[1], network_input.shape[2]), #input_shape=(# notes in a sequence = 100, # notes at once = 1)
#         recurrent_dropout=0.3,
#         return_sequences=True
#     ))
#     model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
#     model.add(LSTM(512))
#     model.add(BatchNorm())
#     model.add(Dropout(0.3))
#     model.add(Dense(256))
#     model.add(Activation('relu'))
#     model.add(BatchNorm())
#     model.add(Dropout(0.3))
#     model.add(Dense(n_vocab))
#     model.add(Activation('softmax'))
#     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

#     return model

# def train(model, network_input, network_output):
#     """ train the neural network """
#     filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
#     checkpoint = ModelCheckpoint(
#         filepath,
#         monitor='loss',
#         verbose=0,
#         save_best_only=True,
#         mode='min'
#     )
#     callbacks_list = [checkpoint]

#     model.fit(network_input, network_output, epochs=200, batch_size=128, callbacks=callbacks_list)

# if __name__ == '__main__':
#     train_network()
