In [1]:
import mido
import numpy as np 
from matplotlib import pyplot as plt
import os

In [2]:
def midlist(data_folder):
    """
    Walk through a folder, appending to a list only MID files
    :param data_folder: the base data folder used in the model, still nonrepresentative
    :return all_mid: the list of MID in the folder
    """
    all_mid = []
    
    # used to clean the folder after running for the first time
    ignored = 0
    
    for dirpath, _, filenames in os.walk(data_folder):
        for File in filenames:
            path = os.path.join(dirpath, File)
            
            if not (path.endswith('.mid') or path.endswith('.midi')):
                print(f"{path} ignored")
                ignored +=1
                continue
                
            else:
                all_mid.append(mido.MidiFile(path))
                    
    return all_mid

In [3]:
def onlyPR(mids):
    """
    Takes a list of mid files and keeps only the painoRoll mids 
    :param mids: the base list of mid
    :return PR: a copy of -mids- but with songs that only has pianoRoll format
    """
    # it won't change the base list
    PR = mids.copy()
    
    for music in PR:
        for track in music.tracks:
            for msg in track:
                # pass through useless Metamessages
                if msg.is_meta:
                    continue
                # checking piano channels
                if msg.channel not in [0,1,2]:
                    try:
                        PR.remove(music)
                    except ValueError:
                        continue
    return PR

In [4]:
dataset_mids = midlist("dataset")
pianoRoll_mids = onlyPR(dataset_mids)

In [5]:
for music in range(len(pianoRoll_mids)):
    pianoRoll_mids[music].save(f"Normal_TS/{music}.mid")

In [6]:
def midi_to_samples(mid, SPM):
    """
    Gets a mid ant converts into vectors, like samples in measures
    :param mid: midi file that will be converted
    :param SPM: the number of samples wanted in a measure
    :return samples: all the samples off the mid
    """
    #takes the ticks per beat of the mid
    TPM = mid.ticks_per_beat

    #used only for mutiple signatures
#     for track in mid.tracks:
#         for msg in track:
#             if msg.type == 'time_signature':
#                 new_tpm = msg.denominator * TPM/msg.numerator

    #creates a dic to keep the notes (keys) and the time that it has been played (values)
    all_notes = {}

    for track in mid.tracks:
        abs_time = 0
        for msg in track:
            abs_time += msg.time
            
            #Notes starts
            if msg.type == "note_on":
                #Skip notes that we can´t hear
                if msg.velocity == 0:
                    continue
                
                note = msg.note
                
                if note not in all_notes:
                    all_notes[note] = []
                    
                else:
                    single_note = all_notes[note][-1] # If already note_on then end that note
                    if len(single_note) == 1:
                        single_note.append(single_note[0] + 1) 
                        
                all_notes[note].append([abs_time * SPM / TPM]) #Time that the note started been played

            elif msg.type == 'note_off':
                if len(all_notes[note][-1]) != 1:
                    continue
                all_notes[note][-1].append(abs_time * SPM / TPM) #Time that the note stops

    ####### It needs an upgrade #######
    #Stopping any note that hasnt ended
    for note in all_notes:
        for start_end in all_notes[note]:
            if len(start_end) == 1:
                start_end.append(start_end[0] + 1)

    samples = []
    for note in all_notes:
        for start, end in all_notes[note]:
            sample_ix = int(start/SPM)
            
            # fill in silence until the appropriate sample/measure is reached
            while len(samples) <= sample_ix:
                samples.append(np.zeros((SPM, 128), dtype=np.uint8))
            
            # get sample and find its start to encode the start of the note
            sample = samples[sample_ix]
            start_ix = int(start - sample_ix * SPM)
            sample[start_ix, note] = 1

    return samples

def find_sample_range(samples):
    """
    Find the lowest and highest note in a sample
    :param samples: List of samples
    :return min_note, max_note: The equivalent lowest note in the param shapes and its max note
    """
    #Creates the lowest sample that can be played with the param's shapes
    merged_sample = np.zeros_like(samples[0])

    for sample in samples:
        merged_sample = np.maximum(merged_sample, sample) #Maximum compare two arrays and returns element-wise maxima

    merged_sample = np.amax(merged_sample, axis=0)
    
    min_note = np.argmax(merged_sample) #Returns first of the maximum values along an axis
    max_note = merged_sample.shape[0] - np.argmax(merged_sample[::-1]) #Returns last of the maximum values along an axis
    
    return min_note, max_note

def generate_centered_transpose(samples):
    """
    Center samples towards the middle of the pitch range (between the min_note and max_note of the sample).
    :param samples: list of samples
    :return out_samples: the original sample but centered based on min and max notes
    :return out_lengths: the size of the song
    """
    num_notes = samples[0].shape[1]
    min_note, max_note = find_sample_range(samples)

    # find deviation from pitch center
    center_deviation = num_notes/2 - (max_note + min_note)/2

    out_samples = samples
    out_lengths = [len(samples), len(samples)]

    # center every sample by moving it by center_deviation
    for i in range(len(samples)):
        out_sample = np.zeros_like(samples[i])
        out_sample[:, min_note+int(center_deviation) : max_note+int(center_deviation)] = samples[i][:, min_note:max_note]
        out_samples.append(out_sample)

    return out_samples, out_lengths

In [7]:
all_samples = []
all_lens = []

for mid in pianoRoll_mids:
    samples = midi_to_samples(mid, 16)
    samples, lens = generate_centered_transpose(samples)
    all_lens += lens
    all_samples += samples

all_samples = np.array(all_samples, dtype=np.uint8)
all_lens = np.array(all_lens, dtype=np.uint32)
np.save('samples.npy', all_samples)
np.save('lengths.npy', all_lens)