# Setup

In [33]:
# File Setup
import os

# Data Structures 
import torch
import numpy as np

# MIDI Files
import mido
import pretty_midi

# torch utils
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

# MIDI -> Piano Roll Array
* An array of shape (128, time steps)
* 128 representing notes from 0 to 127
* Piano Roll to be converted to sequences (RNN/Transformer) or 2D format (CNN)

In [22]:
classical_music_list = os.listdir('classical_music')
pop_music_list = os.listdir('pop_music')
rock_music_list = os.listdir('rock_music')

In [23]:
classical_music_list = [f"classical_music/{p}" for p in classical_music_list]
pop_music_list = [f"pop_music/{p}" for p in pop_music_list]
rock_music_list = [f"rock_music/{p}" for p in rock_music_list]

In [24]:
def midi_to_piano_roll(file_path, fs=100):
    midi_data = pretty_midi.PrettyMIDI(file_path)
    
    # Convert to piano roll (time x notes)
    piano_roll = midi_data.get_piano_roll(fs=fs)
    
    # Convert to binary (1 if note is played, 0 if not)
    piano_roll_binary = (piano_roll > 0).astype(int)
    return piano_roll_binary

In [25]:
pop_PR_list = list(map(midi_to_piano_roll, pop_music_list))
classical_PR_list = list(map(midi_to_piano_roll, classical_music_list))
rock_PR_list = list(map(midi_to_piano_roll, rock_music_list))



In [26]:
for m in rock_PR_list:
    print(m.shape)

# 128 represents a note from 0 to 127, 
# Second item in the tuple represents how many time steps
    

(128, 16217)
(128, 23985)
(128, 30787)
(128, 17999)
(128, 17333)
(128, 9218)
(128, 27078)
(128, 15750)


# Piano Roll -> Sequence
* For RNNs or Transformers

In [78]:
def roll2sequence(piano_roll, sequence_length=100):
    # Check if piano roll has enough time steps for at least one sequence
    if piano_roll.shape[1] < sequence_length:
        return None
    piano_roll_sequences = [piano_roll[:, i:i + sequence_length] 
                        for i in range(0, piano_roll.shape[1] - sequence_length, sequence_length)]
    piano_roll_sequences = np.stack(piano_roll_sequences)
    return piano_roll_sequences # shape (num_sequences, 128, sequence_length)

In [79]:
# Filter out `None` values in the sequence lists
pop_sequences = list(filter(None, map(roll2sequence, pop_PR_list)))
classical_sequences = list(filter(None, map(roll2sequence, classical_PR_list)))
rock_sequences = list(filter(None, map(roll2sequence, rock_PR_list)))

In [82]:
for s in rock_sequences:
    print(s.shape)

(162, 128, 100)
(239, 128, 100)
(307, 128, 100)
(179, 128, 100)
(173, 128, 100)
(92, 128, 100)
(270, 128, 100)
(157, 128, 100)


# Sequence -> Torch Tensor
* For CNNs

In [83]:
def roll2tensor(PR_sequences):
    return torch.tensor(PR_sequences, dtype=torch.float32) # shape (num_sequences, 128, sequence, length)

In [84]:
pop_tensors = list(map(roll2tensor, pop_sequences))
classical_tensors = list(map(roll2tensor, classical_sequences))
rock_tensors = list(map(roll2tensor, rock_sequences))

In [86]:
for t in rock_tensors:
    print(t.shape)

torch.Size([162, 128, 100])
torch.Size([239, 128, 100])
torch.Size([307, 128, 100])
torch.Size([179, 128, 100])
torch.Size([173, 128, 100])
torch.Size([92, 128, 100])
torch.Size([270, 128, 100])
torch.Size([157, 128, 100])


# Save Tensors

In [87]:
# Save each list of tensors to a .pt file
torch.save(pop_tensors, 'pop_tensors.pt')
torch.save(classical_tensors, 'classical_tensors.pt')
torch.save(rock_tensors, 'rock_tensors.pt')

# Load Tensors

In [27]:
# To load the lists back later
pop_tensors = torch.load('tensors_folder/pop_tensors.pt')
classical_tensors = torch.load('tensors_folder/classical_tensors.pt')
rock_tensors = torch.load('tensors_folder/rock_tensors.pt')

# Torch Dataset and DataLoader

In [34]:
class MusicDataset(Dataset):
    def __init__(self, tensor_list):
        self.data = tensor_list

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def pad_collate(batch):
    # Pads each sequence to the length of the longest sequence in this batch
    batch = [item.squeeze() for item in batch]  # Remove extra dimensions if needed
    padded_batch = pad_sequence(batch, batch_first=True, padding_value=0)
    return padded_batch

# Each tensor in the music dataset will not have the same shape because shapes are (num_sequences, note=128, fs=100)
# So we need to pad_collate each batch

In [35]:
pop_dataset = MusicDataset(pop_tensors)
classical_dataset = MusicDataset(classical_tensors)
rock_dataset = MusicDataset(rock_tensors)

In [36]:
batch_size = 32

pop_loader = DataLoader(pop_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate)
classical_loader = DataLoader(classical_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate)
rock_loader = DataLoader(rock_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate)

In [44]:
for index, batch in enumerate(classical_loader):
    print(batch.shape)

torch.Size([32, 1321, 128, 100])
torch.Size([32, 605, 128, 100])
torch.Size([32, 630, 128, 100])
torch.Size([32, 670, 128, 100])
torch.Size([32, 392, 128, 100])
torch.Size([32, 760, 128, 100])
torch.Size([32, 687, 128, 100])
torch.Size([32, 1011, 128, 100])
torch.Size([32, 592, 128, 100])
torch.Size([7, 830, 128, 100])


In [45]:
for index, batch in enumerate(pop_loader):
    print(batch.shape)

torch.Size([32, 186, 128, 100])
torch.Size([18, 99, 128, 100])
