In [1]:
#run with Python 3.11
import glob
import pickle
import numpy as np
from music21 import converter, instrument, note, chord

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import TensorDataset, DataLoader


In [2]:
#Hyperparams
batch_size = 128

## Data Preprocessing


In [3]:
def get_notes(load_existing = True):
    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    if load_existing:
        print("loading existing notes")
        with open('data/notes', 'rb') as filepath:
            notes, durations = pickle.load(filepath)
            return notes, durations
    
    notes = []
    durations = []

    for file in glob.glob("midi_songs/*.mid"):
        midi = converter.parse(file)

        print("Parsing %s" % file)

        notes_to_parse = None

        try: # file has instrument parts
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except: # file has notes in a flat structure
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
                durations.append(element.duration.quarterLength)
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
                durations.append(element.duration.quarterLength)

    with open('data/notes', 'wb') as filepath:
        pickle.dump((notes, durations), filepath)

    return notes, durations


#assemble training data in a readable format 
from preprocessing import prepare_sequences


In [4]:
notes, durations = get_notes(load_existing=True) # set load_existing to True to skip reading in midi

# get amount of pitch names
n_vocab = len(set(notes))
d_vocab = len(set(durations))

network_input, network_output = prepare_sequences(notes, durations, n_vocab, d_vocab)

#build the dataset + dataloader
input_tensor = torch.tensor(network_input, dtype=torch.float32)
print(input_tensor.shape)
output_tensor = torch.tensor(network_output, dtype=torch.float32)

dataset = TensorDataset(input_tensor, output_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

loading existing notes
[[[0.45092025]
  [0.91104294]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.23312883]
  [0.45092025]
  [0.23312883]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.04601227]
  [0.91104294]
  [0.45092025]
  [0.04601227]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.76687117]
  [0.45092025]
  [0.76687117]
  [0.76687117]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.91104294]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.9202454 ]
  [0.48773006]
  [0.78527607]
  [0.45092025]
  [0.45092025]
  [0.46319018]
  [0.45092025]
  [0.46319018]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  [0.9202454 ]
  [0.48773006]
  [0.78527607]
  [0.45092025]
  [0.45092025]
  [0.70552147]
  [0.91104294]
  [0.45092025]
  [0.70552147]
  [0.45092025]
  [0.45092025]
  [0.45092025]
  

  output_tensor = torch.tensor(network_output, dtype=torch.float32)


## Model Architecture

In [8]:
from models import MusicLSTM

def train(num_epochs, model, train_dataloader, loss_func, optimizer):
    losses = []
    total_steps = len(train_dataloader)

    for epoch in range(num_epochs):
        for batch, (sequence, next_note) in enumerate(train_dataloader):
            output = model(sequence)
            loss = loss_func(output, next_note)
            losses.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (batch+1)%100 == 0:
                print(f"Epoch: {epoch+1}; Batch {batch+1} / {total_steps}; Loss: {loss.item():>4f}")
    
    return losses

## Training

In [9]:
#set input len to 1
model = MusicLSTM(input_len=1, hidden_size=256, num_note_classes=n_vocab, num_duration_classes = d_vocab, num_layers=2)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

losses = train(num_epochs=300, model=model, loss_func=loss_function, optimizer=optimizer, train_dataloader=dataloader)


tensor([[[[0.8006],
          [0.9202],
          [0.8804],
          ...,
          [0.8834],
          [0.9049],
          [0.8528]],

         [[0.0291],
          [0.0291],
          [0.0291],
          ...,
          [0.0291],
          [0.0640],
          [0.0407]]],


        [[[0.8558],
          [0.9877],
          [0.9141],
          ...,
          [0.0460],
          [0.5613],
          [0.8190]],

         [[0.0116],
          [0.0116],
          [0.0116],
          ...,
          [0.0291],
          [0.0291],
          [0.0291]]],


        [[[0.1012],
          [0.1012],
          [0.1012],
          ...,
          [0.9387],
          [0.7761],
          [0.9325]],

         [[0.0291],
          [0.0291],
          [0.0640],
          ...,
          [0.1221],
          [0.0291],
          [0.0291]]],


        ...,


        [[[0.8834],
          [0.9755],
          [0.9571],
          ...,
          [0.7822],
          [0.9356],
          [0.8773]],

         [[0.0291],


KeyboardInterrupt: 

In [None]:
torch.save(model, "models/music_model.pt")

In [None]:

train(num_epochs=400, model=model, loss_func=loss_function, optimizer=optimizer, train_dataloader=dataloader)


Epoch: 1; Batch 100 / 359; Loss: 0.756491
Epoch: 1; Batch 200 / 359; Loss: 0.797698
Epoch: 1; Batch 300 / 359; Loss: 0.827648
Epoch: 2; Batch 100 / 359; Loss: 0.883092
Epoch: 2; Batch 200 / 359; Loss: 0.764925
Epoch: 2; Batch 300 / 359; Loss: 0.923588
Epoch: 3; Batch 100 / 359; Loss: 0.919735
Epoch: 3; Batch 200 / 359; Loss: 0.941512
Epoch: 3; Batch 300 / 359; Loss: 1.009081
Epoch: 4; Batch 100 / 359; Loss: 0.944536
Epoch: 4; Batch 200 / 359; Loss: 0.860411
Epoch: 4; Batch 300 / 359; Loss: 0.900028
Epoch: 5; Batch 100 / 359; Loss: 0.889803
Epoch: 5; Batch 200 / 359; Loss: 0.844057
Epoch: 5; Batch 300 / 359; Loss: 0.911582
Epoch: 6; Batch 100 / 359; Loss: 0.875203
Epoch: 6; Batch 200 / 359; Loss: 0.852586
Epoch: 6; Batch 300 / 359; Loss: 0.885752
Epoch: 7; Batch 100 / 359; Loss: 0.773639
Epoch: 7; Batch 200 / 359; Loss: 0.947121
Epoch: 7; Batch 300 / 359; Loss: 0.844506
Epoch: 8; Batch 100 / 359; Loss: 0.924661
Epoch: 8; Batch 200 / 359; Loss: 0.917312
Epoch: 8; Batch 300 / 359; Loss: 0

In [None]:

# def create_network(network_input, n_vocab):
#     """ create the structure of the neural network """
#     model = Sequential()
#     model.add(LSTM(
#         512,
#         input_shape=(network_input.shape[1], network_input.shape[2]), #input_shape=(# notes in a sequence = 100, # notes at once = 1)
#         recurrent_dropout=0.3,
#         return_sequences=True
#     ))
#     model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.3,))
#     model.add(LSTM(512))
#     model.add(BatchNorm())
#     model.add(Dropout(0.3))
#     model.add(Dense(256))
#     model.add(Activation('relu'))
#     model.add(BatchNorm())
#     model.add(Dropout(0.3))
#     model.add(Dense(n_vocab))
#     model.add(Activation('softmax'))
#     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

#     return model

# def train(model, network_input, network_output):
#     """ train the neural network """
#     filepath = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
#     checkpoint = ModelCheckpoint(
#         filepath,
#         monitor='loss',
#         verbose=0,
#         save_best_only=True,
#         mode='min'
#     )
#     callbacks_list = [checkpoint]

#     model.fit(network_input, network_output, epochs=200, batch_size=128, callbacks=callbacks_list)

# if __name__ == '__main__':
#     train_network()
