<a href="https://colab.research.google.com/github/jmhuer/SymbolicMusicData/blob/main/POP909.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pretty_midi
!git clone https://github.com/jmhuer/ModularSparseAutoencoder
!git clone https://github.com/music-x-lab/POP909-Dataset
%cd /content/POP909-Dataset/data_process
!pip install libfmp

Cloning into 'ModularSparseAutoencoder'...
remote: Enumerating objects: 19, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 19 (delta 6), reused 10 (delta 2), pack-reused 0[K
Unpacking objects: 100% (19/19), done.
Cloning into 'POP909-Dataset'...
remote: Enumerating objects: 9265, done.[K
remote: Counting objects: 100% (9265/9265), done.[K
remote: Compressing objects: 100% (8157/8157), done.[K
remote: Total 9265 (delta 13), reused 9245 (delta 6), pack-reused 0[K
Receiving objects: 100% (9265/9265), 45.75 MiB | 24.62 MiB/s, done.
Resolving deltas: 100% (13/13), done.
/content/POP909-Dataset/data_process


In [None]:
#@title Pytorch for DL

import torch.nn.functional as F
import torch.optim as optim
from torch import nn
import torch
from torch.nn.utils import weight_norm
import numpy as np
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("Using device: ", device)


def get_model_parameters(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    return params

Using device:  cuda


In [None]:

import pickle
import os
import sys
# import utils
from processor import MidiEventProcessor
import pretty_midi 
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms



class Pop909(Dataset):
    def __init__(self, midi_paths, parts="all", traspose=True):
      "todo: transpose"
      import csv
      from os import path
      self.total = 0
      self.all_data = self.preprocess_pop909(midi_paths=midi_paths, 
                                               parts=parts, 
                                               traspose=traspose).double()
      print("all_data ", self.all_data.shape)
      self.transform = self.make_transform()
    def __len__(self):
        return len(self.all_data)
    def __getitem__(self, idx):
        piano_roll_slice = self.all_data[idx,:]
        return self.transform["norm"](piano_roll_slice[None][None])
    def make_transform(self):
      mean = self.all_data.mean()
      std = self.all_data.std() * 2
      # print("mean ", mean.shape)
      # print("std ", std.shape)
      tensor_transform = {
        'norm':
            transforms.Compose([
                transforms.Normalize([mean], [std])  # Imagenet standards
            ]),
        "inverse_norm":
              transforms.Normalize(
                mean= [-m/s for m, s in zip([mean], [std])],
                std= [1/s for s in [std]]
               )
           }
      return tensor_transform
    def preprocess_midi(self, path, parts="all"):
        get_index = {
            "all"   : [0,3],
            "melody": [0,1],  
            "bridge": [1,2],  
            "chords": [2,3],  
        }
        index = get_index[parts]
        data = pretty_midi.PrettyMIDI(path)
        main_notes = []
        acc_notes = []
        for ins in data.instruments[index[0]:index[1]]:
            acc_notes.extend(ins.notes)
        for i in range(len(main_notes)):
            main_notes[i].start = round(main_notes[i].start,2)
            main_notes[i].end = round(main_notes[i].end,2)
        for i in range(len(acc_notes)):
            acc_notes[i].start = round(acc_notes[i].start,2)
            acc_notes[i].end = round(acc_notes[i].end,2)
        main_notes.sort(key = lambda x:x.start)
        acc_notes.sort(key = lambda x:x.start)

        piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
        piano = pretty_midi.Instrument(program=piano_program)
        piano.notes.extend(acc_notes)
        # score = prettyn
        # mpr = MidiEventProcessor()
        # repr_seq = mpr.encode([main_notes, acc_notes])
        self.total += 1
        roll = piano.get_piano_roll()
        return roll #to avoid mixed composition batches 256 = bath 
    def preprocess_pop909(self, midi_paths, parts="all", traspose=True):
        save_py = []
        i = 0
        out_fmt = '{}-{}.data'
        for path in midi_paths:
            if path[len(path)-5:len(path)] != ".xlsx":
              # print(' ', end='[{}]'.format(path), flush=True)
              filename = path + "/"+ path[10:14] + ".mid"
              try:
                  data = torch.tensor(self.preprocess_midi(filename, parts=parts))
              except KeyboardInterrupt:
                  print(' Abort')
                  return
              except EOFError:
                  print('EOF Error')
                  return
              save_py.append(data.T)
        return torch.cat(save_py)


def load_data(midi_paths, parts, num_workers=0, batch_size=32, random_seed = 40):
    '''
    this data loading proccedure assumes dataset/train/ dataset/val/ folders
    also assumes transform dictionary with train and val
    '''
    total = len(midi_paths) 
    index = int(len(midi_paths)*0.9) #_0% of data
                
    dataset_train = Pop909(midi_paths[0:index],parts=parts) 
    dataset_val = Pop909(midi_paths[index:total],parts=parts) 

    print("Size of train dataset: ",len(dataset_train))
    print("Size of val dataset: ",len(dataset_val))

    dataloaders = {
        'train': DataLoader(dataset_train, batch_size=batch_size, shuffle=False, drop_last=True),
        'val': DataLoader(dataset_val, batch_size=batch_size, shuffle=False, drop_last=True)
    }
    return dataloaders



##only tensor transforms



In [None]:

midi_paths = ["../POP909/" + d for d in os.listdir("../POP909/")] #not index

dataset = load_data(midi_paths=midi_paths, parts="chords", batch_size=256)
print(len(dataset["val"])*256 )


all_data  torch.Size([20326974, 128])
all_data  torch.Size([2274122, 128])
Size of train dataset:  20326974
Size of val dataset:  2274122
2274048


In [None]:


def piano_roll_to_pretty_midi(piano_roll, fs=100, program=0):
    '''Convert a Piano Roll array into a PrettyMidi object
     with a single instrument.
    Parameters
    ----------
    piano_roll : np.ndarray, shape=(128,frames), dtype=int
        Piano roll of one instrument
    fs : int
        Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    program : int
        The program number of the instrument.
    Returns
    -------
    midi_object : pretty_midi.PrettyMIDI
        A pretty_midi.PrettyMIDI class instance describing
        the piano roll.
    '''
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm




In [None]:
import IPython.display
index = 1021 ## 44 66 & 0 & 1 500 omg 1021
# cello_c_chord.write('cello-C-chord.mid')
#lets play a batch 
tr = dataset["val"].dataset.transform["inverse_norm"]
listen = []
for i, ba in enumerate(dataset["val"]): 
    if i < index: 
        continue
    listen.append(ba)
    if len(listen)==10: 
        ba = torch.cat(listen)
        x_var = ba[:,0,0,:].to(net.device)
        xpred_var = net(x_var)[:,None,None,:]
        print(xpred_var.shape)
        batch_piano_roll = tr(ba)[:,0,0,:].T
        pred_piano_roll = tr(xpred_var)[:,0,0,:].T
        pred_piano_roll = (pred_piano_roll>5) *100
        break

pm = piano_roll_to_pretty_midi(pred_piano_roll.detach().cpu().numpy())
IPython.display.Audio(pm.synthesize(fs=16000), rate=16000)



torch.Size([2560, 1, 1, 128])
