In [2]:
import numpy as np

# Load the saved data
X = np.load('notes.npy', allow_pickle=True)

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset

class MusicSequenceDataset(Dataset):
    def __init__(self, notes, sequence_length):
        self.notes = notes  # list of lists of dictionaries (list of sequences of notes)
        self.sequence_length = sequence_length  # length of the input sequence

    def __len__(self):
        return len(self.notes) - self.sequence_length  # Total number of sequences available

    def __getitem__(self, idx):
        # Get a sequence of notes (list of dictionaries)
        sequence = self.notes[idx:idx + self.sequence_length]
        target = self.notes[idx + self.sequence_length]

        # The sequence should now be a flat list of dictionaries, create the tensor for X
        X = torch.tensor([[note['start'], note['end'], note['pitch'], note['velocity']] for note in sequence], dtype=torch.float32)
        
        # The target note is the next note's features
        y = torch.tensor([target['start'], target['end'], target['pitch'], target['velocity']], dtype=torch.float32)
        
        return X, y

class MusicLSTMModel(nn.Module):
    def __init__(self, input_size=4, hidden_size=128, num_layers=2, output_size=4):
        super(MusicLSTMModel, self).__init__()
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully connected layer to map the hidden state to the output
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        # Pass through LSTM layer
        lstm_out, (hn, cn) = self.lstm(x)
        
        # Only take the output of the last time step
        last_lstm_output = lstm_out[:, -1, :]
        
        # Pass the last output through a fully connected layer
        output = self.fc(last_lstm_output)
        
        return output
    
# Instantiate the dataset and DataLoader
sequence_length = 16
dataset = MusicSequenceDataset(X, sequence_length=sequence_length)
print(dataset[5])

(tensor([[ 1.7865,  1.8281, 72.0000, 76.0000],
        [ 1.8031,  2.0000, 67.0000, 56.0000],
        [ 1.9833,  2.0979, 74.0000, 68.0000],
        [ 2.0375,  2.1063, 72.0000, 77.0000],
        [ 2.0979,  2.1823, 74.0000, 51.0000],
        [ 2.1719,  2.2802, 67.0000, 57.0000],
        [ 2.3281,  2.5094, 66.0000, 58.0000],
        [ 2.1490,  2.5198, 72.0000, 60.0000],
        [ 1.9833,  2.5229, 57.0000, 61.0000],
        [ 2.5229,  2.5896, 71.0000, 68.0000],
        [ 2.5906,  2.6740, 72.0000, 47.0000],
        [ 2.5583,  2.7635, 64.0000, 35.0000],
        [ 2.8875,  3.1135, 62.0000, 63.0000],
        [ 3.0802,  3.2729, 66.0000, 63.0000],
        [ 2.7125,  3.4198, 59.0000, 50.0000],
        [ 2.6792,  3.4479, 74.0000, 68.0000]]), tensor([ 3.4531,  3.6021, 71.0000, 66.0000]))


In [12]:
# Load the model
model = MusicLSTMModel()

# Load the weights from the .pth file
model.load_state_dict(torch.load('model_epoch_3.pth'))

# Set the model to evaluation mode
model.train()

MusicLSTMModel(
  (lstm): LSTM(4, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)

In [None]:
import pretty_midi
import random

# Function to generate music
def generate_music(model, initial_notes, num_notes, sequence_length=8):
    notes_sequence = initial_notes[0]
    
    # Generate new notes until we have the required number of notes
    for i in range(num_notes - len(initial_notes)):
        # Create input tensor from the last `sequence_length` notes
        sequence = notes_sequence[i:i+sequence_length].unsqueeze(0) # add the batch dimension
        print(sequence)
        
        # Predict the next note
        with torch.no_grad():
            predicted_note = model(sequence)

        print(predicted_note)

        # Extract the predicted note (start, end, pitch, velocity)
        predicted_note_dict = {
            'start': predicted_note[0].item(),
            'end': predicted_note[1].item(),
            'pitch': int(predicted_note[2].item()),
            'velocity': int(predicted_note[3].item())
        }
        
        # Append the predicted note to the sequence
        notes_sequence.append(predicted_note_dict)
    
    return notes_sequence

# Function to convert the list of notes to a MIDI file
def notes_to_midi(notes, output_file='generated_music.mid'):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=pretty_midi.program_to_instrument(0))  # 0 is the piano program
    
    for note in notes:
        midi_note = pretty_midi.Note(
            velocity=note['velocity'],
            pitch=note['pitch'],
            start=note['start'],
            end=note['end']
        )
        instrument.notes.append(midi_note)
    
    midi.instruments.append(instrument)
    midi.write(output_file)
    print(f'MIDI file saved to {output_file}')

# Initial notes to start the sequence (replace with your own starting notes)'
#rnd_idx = random.randint(0, len(dataset)-1)

initial_notes = dataset[872]

# Generate music with the model
num_notes = 200  # Desired number of notes to generate
generated_notes = generate_music(model, initial_notes, num_notes)

# Convert the generated notes to a MIDI file
notes_to_midi(generated_notes, 'generated_music.mid')


tensor([[[107.0417, 107.2552,  67.0000,  62.0000],
         [107.2500, 107.3917,  50.0000,  61.0000],
         [107.2458, 107.4875,  71.0000,  76.0000],
         [107.4437, 107.6698,  67.0000,  59.0000],
         [107.6448, 107.8031,  72.0000,  76.0000],
         [107.6521, 107.8552,  51.0000,  66.0000],
         [107.8333, 108.1635,  67.0000,  66.0000],
         [108.0708, 108.2135,  75.0000,  78.0000]]])
tensor([[-57.9143, -57.3515,  44.5148,  41.7698]])


RuntimeError: a Tensor with 4 elements cannot be converted to Scalar

: 