In [1]:
import json
import pretty_midi as pm
import pandas as pd
from pathlib import Path

In [2]:
#virtuoso dataframe (i produce parsing the folders of the dataset)
virtuoso_df = pd.read_pickle("performance_dataframe.pkl")

#maestro dataframe (from google)
maestro_df = pd.read_json('maestro-v2.0.0.json')

In [12]:
virtuoso_df.head()

Unnamed: 0,author,folder,midi2midi_alignment_path,performed_midi_path,performer,score2midi_alignment,score_midi_path,score_xml_path,title
0,Bach,Bach/Fugue/bwv_846,Bach/Fugue/bwv_846/Shi05_infer_corresp.txt,Bach/Fugue/bwv_846/Shi05.mid,Shi05,Bach/Fugue/bwv_846/Shi05_infer_match.txt,Bach/Fugue/bwv_846/midi_cleaned.mid,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Fugue_bwv_846
1,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/Denisova06_infer_corresp.txt,Bach/Fugue/bwv_848/Denisova06.mid,Denisova06,Bach/Fugue/bwv_848/Denisova06_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848
2,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/Lee01_infer_corresp.txt,Bach/Fugue/bwv_848/Lee01.mid,Lee01,Bach/Fugue/bwv_848/Lee01_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848
3,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/LeeSH01_infer_corresp.txt,Bach/Fugue/bwv_848/LeeSH01.mid,LeeSH01,Bach/Fugue/bwv_848/LeeSH01_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848
4,Bach,Bach/Fugue/bwv_848,Bach/Fugue/bwv_848/Lin04_infer_corresp.txt,Bach/Fugue/bwv_848/Lin04.mid,Lin04,Bach/Fugue/bwv_848/Lin04_infer_match.txt,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Fugue_bwv_848


In [11]:
maestro_df.head()

Unnamed: 0,audio_filename,canonical_composer,canonical_title,duration,midi_filename,split,year
0,2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R...,Alban Berg,Sonata Op. 1,698.66116,2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R...,train,2018
1,2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MI...,Alban Berg,Sonata Op. 1,759.518471,2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MI...,train,2008
2,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,Alban Berg,Sonata Op. 1,464.649433,2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-...,train,2017
3,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,Alexander Scriabin,"24 Preludes Op. 11, No. 13-24",872.640588,2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MI...,train,2004
4,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,Alexander Scriabin,"3 Etudes, Op. 65",397.857508,2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MI...,validation,2006


## Some scripts for the automatic alignment

In [3]:
MAESTRO_BPATH = Path("../datasets/maestro-v2.0.0/")  #you need to download the maestro dataset from https://magenta.tensorflow.org/datasets/maestro
VIRTUOSO_BPATH = Path("./")

def get_midi_notes(midi,start = 0): # get all midi notes ordered
    #extract notes looping through the instrument
    notes_all = [(note.start, note.end, note.pitch) for instrument in midi.instruments for note in instrument.notes if note.start>=start]
    #order notes according to onset and pitch
    return sorted(notes_all, key = lambda x: (x[0],x[2]))

def normalize_midi_notes(notes): # remove the initial silence in the midi 
    #get the onset of the first midi note
    first = notes[0][0]
    #subtract the first to all the tempos
    return [(n[0]-first,n[1]-first,n[2]) for i,n in enumerate(notes)]

def path2normmidinotes(base_path,midi_path,start = 0): #function to add the normnilized notes to the dataframes
    midi = pm.PrettyMIDI(str(Path(base_path,midi_path)))
    return normalize_midi_notes(get_midi_notes(midi,start = start))


#We add the normalized note to the df instead of computing them on the fly to speed up the computation later.
#the alignment is a search problem of complexity len(virtuoso)*len(maestro) and the midi import take some time,
#so better to do it just once for all the pieces.
#As a negative side, this will take a huge amount of memory, so other solutions can be maybe found
# virtuoso_df["norm_notes"]= virtuoso_df.apply(lambda row:  path2normmidinotes(VIRTUOSO_BPATH,row["performed_midi_path"]),axis=1)
# maestro_df["norm_notes"]= maestro_df.apply(lambda row:  path2normmidinotes(MAESTRO_BPATH,row["midi_filename"]),axis=1)

In [4]:
def notes_are_similar(notes1,notes2, number_to_match,tolerance = 0.03, verbose=False):
    """
    function to test if 2 lists of notes are similar. A note is a triple (start,end,pitch)
    PARAMETERS:
    -number to match: the number of notes (at beginning) to match to consider similar 2 list of notes
    -tolerance: absolute tolerance in ms to consider two time similar
    """
    #test the lenght is number_to_match is not specified
    if number_to_match is None and (len(notes1)!=len(notes2)):
        if verbose: print("Not equal lenght:", len(notes1), "vs", len(notes2))
        return False
    
    onsets1 = [n[0] for n in notes1]
    offsets1 = [n[1] for n in notes1]
    pitches1 = [n[2] for n in notes1]
    onsets2 = [n[0] for n in notes2]
    offsets2 = [n[1] for n in notes2]
    pitches2 = [n[2] for n in notes2]
    #test onsets
    try:
        np.testing.assert_allclose(np.array(onsets1[:number_to_match]), np.array(onsets2[:number_to_match]),atol=tolerance)
    except AssertionError as e:
        if verbose: print("Onset problem:",e)
        return False
    #test offsets
    try:
        np.testing.assert_allclose(np.array(offsets1[:number_to_match]), np.array(offsets2[:number_to_match]),atol=tolerance)
    except AssertionError as e:
        if verbose: print("Offset problem:",e)
        return False
    #test pitches
    if pitches1[:number_to_match] == pitches2[:number_to_match]:
        return True
    else:
        if verbose: 
            print("Pitch problem occurring at indices:")
            print([(i,p1,p2) for i,(p1,p2) in enumerate(zip(pitches1,pitches2)) if p1!=p2])
        return False

def find_similar_midi(df1,df2, number_to_match):
    """
    Given two dataframes find a correspondence of each piece of the first to the second (if exist).
    There is no control for injectivity or uniqueness of the matching function for now except one print warning for multiple matching (that we woul very like to avoid).
    """
    #initialize the line contatining the matching information
    correspondence = []
    #now search for a similar midi in maestro_dict
    for i1,row1 in df1.iterrows():
        #compare row1 with each element of df2
        matched = df2.apply(lambda row2: notes_are_similar(row1["norm_notes"],row2["norm_notes"],5),axis=1)
        #save the matching information
        number_matched = sum(matched)
        if number_matched == 0:
            correspondence.append(None)
        elif number_matched == 1:
            correspondence.append(df2[matched]["midi_filename"].tolist()[0])
        else: #multiple matching 
            print("WARNING: Multiple matching found for", row1["performed_midi_path"])
            correspondence.append(df2[matched]["midi_filename"].tolist())
    df1["correspondence"] =correspondence

# Let's run it for the Bach

In [5]:
bach_virtuoso_df = virtuoso_df[virtuoso_df["author"]=="Bach"].copy()
bach_maestro_df = maestro_df[maestro_df["canonical_composer"]=="Johann Sebastian Bach"].copy()

#add the normalized notes
bach_virtuoso_df["norm_notes"]= bach_virtuoso_df.apply(lambda row:  path2normmidinotes(VIRTUOSO_BPATH,row["performed_midi_path"]),axis=1)
bach_maestro_df["norm_notes"]= bach_maestro_df.apply(lambda row:  path2normmidinotes(MAESTRO_BPATH,row["midi_filename"]),axis=1)

# #perform the alignment
find_similar_midi(bach_virtuoso_df,bach_maestro_df, 10)

In [8]:
temp = bach_virtuoso_df[bach_virtuoso_df["correspondence"].notna()]
temp[[t.split("_")[0]!="Prelude" for t in temp["title"]]]

# We have found mostly preludes (as expected because the fugues are in the same file as preludes in maestro), 
# but also 2 Fugues. Good.

#Let's put those two in the list of correct MIDI 
#format : (virtuosomidi,maestromidi,folder,beginning,end)
# beginning and end are None if we can keep the originals from the file
midi_to_save = []
for i, row in temp[[t.split("_")[0]!="Prelude" for t in temp["title"]]].iterrows():
    midi_to_save.append((row["performed_midi_path"],row["correspondence"],row["folder"],None,None))

In [9]:
midi_to_save

[('Bach/Fugue/bwv_857/Lan01.mid',
  '2011/MIDI-Unprocessed_05_R1_2011_MID--AUDIO_R1-D2_09_Track09_wav.midi',
  'Bach/Fugue/bwv_857',
  None,
  None),
 ('Bach/Fugue/bwv_874/Kurz01.mid',
  '2011/MIDI-Unprocessed_01_R1_2011_MID--AUDIO_R1-D1_03_Track03_wav.midi',
  'Bach/Fugue/bwv_874',
  None,
  None)]

## Divide Preludes from fugues

In [10]:
#consider only the preludes
prelude_virtuoso_df = temp[[t.split("_")[0]=="Prelude" for t in temp["title"]]].copy()
prelude_virtuoso_df

Unnamed: 0,author,folder,midi2midi_alignment_path,performed_midi_path,performer,score2midi_alignment,score_midi_path,score_xml_path,title,norm_notes,correspondence
88,Bach,Bach/Prelude/bwv_846,Bach/Prelude/bwv_846/Shi05_infer_corresp.txt,Bach/Prelude/bwv_846/Shi05.mid,Shi05,Bach/Prelude/bwv_846/Shi05_infer_match.txt,Bach/Prelude/bwv_846/midi_cleaned.mid,Bach/Prelude/bwv_846/musicxml_cleaned.musicxml,Prelude_bwv_846,"[(0.0, 0.9177359145833337, 60), (0.22863269583...",2006/MIDI-Unprocessed_19_R1_2006_01-07_ORIG_MI...
89,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/Denisova06_infer_corresp.txt,Bach/Prelude/bwv_848/Denisova06.mid,Denisova06,Bach/Prelude/bwv_848/Denisova06_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.2126070395833335, 49), (0.00106837708...",2008/MIDI-Unprocessed_02_R1_2008_01-05_ORIG_MI...
90,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/Lee01_infer_corresp.txt,Bach/Prelude/bwv_848/Lee01.mid,Lee01,Bach/Prelude/bwv_848/Lee01_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.46688078541666667, 49), (0.0074786395...",2011/MIDI-Unprocessed_08_R1_2011_MID--AUDIO_R1...
91,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/LeeSH01_infer_corresp.txt,Bach/Prelude/bwv_848/LeeSH01.mid,LeeSH01,Bach/Prelude/bwv_848/LeeSH01_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.13888902083333332, 77), (0.0138889020...",2017/MIDI-Unprocessed_049_PIANO049_MID--AUDIO-...
92,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/Lin04_infer_corresp.txt,Bach/Prelude/bwv_848/Lin04.mid,Lin04,Bach/Prelude/bwv_848/Lin04_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.13888902083333332, 77), (0.0032051312...",2008/MIDI-Unprocessed_10_R1_2008_01-04_ORIG_MI...
93,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/Lou01_infer_corresp.txt,Bach/Prelude/bwv_848/Lou01.mid,Lou01,Bach/Prelude/bwv_848/Lou01_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.12606849583333335, 77), (0.0074786395...",2011/MIDI-Unprocessed_24_R1_2011_MID--AUDIO_R1...
94,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/MiyashitaM01_infer_corres...,Bach/Prelude/bwv_848/MiyashitaM01.mid,MiyashitaM01,Bach/Prelude/bwv_848/MiyashitaM01_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.09281249999999996, 77), (0.0165625000...",2015/MIDI-Unprocessed_R1_D1-9-12_mid--AUDIO-fr...
95,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/Mizumoto03_infer_corresp.txt,Bach/Prelude/bwv_848/Mizumoto03.mid,Mizumoto03,Bach/Prelude/bwv_848/Mizumoto03_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.10683770833333339, 77), (0.0288461812...",2011/MIDI-Unprocessed_12_R1_2011_MID--AUDIO_R1...
96,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/SunY01_infer_corresp.txt,Bach/Prelude/bwv_848/SunY01.mid,SunY01,Bach/Prelude/bwv_848/SunY01_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.07478639583333335, 77), (0.0096153937...",2017/MIDI-Unprocessed_059_PIANO059_MID--AUDIO-...
97,Bach,Bach/Prelude/bwv_848,Bach/Prelude/bwv_848/Zhou01_infer_corresp.txt,Bach/Prelude/bwv_848/Zhou01.mid,Zhou01,Bach/Prelude/bwv_848/Zhou01_infer_match.txt,Bach/Prelude/bwv_848/midi_cleaned.mid,Bach/Prelude/bwv_848/musicxml_cleaned.musicxml,Prelude_bwv_848,"[(0.0, 0.46581240833333337, 49), (0.0106837708...",2011/MIDI-Unprocessed_04_R1_2011_MID--AUDIO_R1...


In [11]:
#add the norm_notes for the aligned maestro performances in prelude_virtuoso_df
prelude_virtuoso_df["maestro_norm_notes"] = prelude_virtuoso_df.apply(lambda row:  path2normmidinotes(MAESTRO_BPATH,row["correspondence"]),axis=1)

In [12]:
def find_split_point_bach(norm_notes,maestro_norm_notes,tolerance):
    """
    Find the split point between preludes and fugues in the coupled maestro performances
    """
    #some checkings
    if len(maestro_norm_notes) == len(norm_notes): # only prelude
        assert(notes_are_similar(norm_notes,maestro_norm_notes, len(maestro_norm_notes),tolerance=tolerance,verbose= True))
        return np.nan
    #double check there are not strange things happening in the prelude part
    if not notes_are_similar(norm_notes,maestro_norm_notes, len(norm_notes),tolerance=tolerance,verbose = True):
        return 0 # here we use 0 to be able to filter out those weird element later
    change_point = maestro_norm_notes[len(norm_notes)][0]
    return change_point

In [13]:
prelude_virtuoso_df["split_point"] = prelude_virtuoso_df.apply(lambda row: find_split_point_bach(row["norm_notes"],row["maestro_norm_notes"],0.5),axis = 1)

Onset problem: 
Not equal to tolerance rtol=1e-07, atol=0.5

(shapes (510,), (129,) mismatch)
 x: array([  0.      ,   0.98077 ,   1.060898,   1.920942,   2.011754,
         2.863251,   2.96261 ,   3.787397,   3.905987,   4.662398,
         4.727569,   4.840817,   5.681629,   5.810903,   6.584408,...
 y: array([ 0.      ,  0.980469,  1.059896,  1.920573,  2.010417,  2.863281,
        2.96224 ,  3.78776 ,  3.90625 ,  4.661458,  4.727865,  4.841146,
        5.68099 ,  5.807292,  6.580729,  6.643229,  6.776042,  7.565104,...
Onset problem: 
Not equal to tolerance rtol=1e-07, atol=0.5

Mismatch: 20.4%
Max absolute difference: 5.34188794
Max relative difference: nan
 x: array([  0.      ,   0.840813,   0.965813,   1.722224,   1.868592,
         2.627139,   2.773507,   3.530986,   3.69338 ,   4.354705,
         4.433765,   4.504278,   4.581201,   5.317313,   5.480774,...
 y: array([  0.      ,   0.841667,   0.966667,   1.728125,   1.875   ,
         2.633333,   2.779167,   3.5375  ,   3.7   

In [14]:
#create a list of pieces of maestro that we don't want to use:
midi_to_not_use = []

#add the 2 pieces where weird things happened (a cut performance during the prelude and one with the onsets very different)
for i, row in prelude_virtuoso_df[prelude_virtuoso_df["split_point"]==0].iterrows():
    midi_to_not_use.append(((row["performed_midi_path"],row["correspondence"],row["folder"])))
    
#add to the midi_to_use the three pieces that are only prelude
for i, row in prelude_virtuoso_df[prelude_virtuoso_df["split_point"].isna()].iterrows():
    midi_to_save.append((row["performed_midi_path"],row["correspondence"],row["folder"],None,None))

In [15]:
#remove from the dataset the pieces already considered
prelude_virtuoso_df = prelude_virtuoso_df[prelude_virtuoso_df["split_point"]!=0]
prelude_virtuoso_df = prelude_virtuoso_df[prelude_virtuoso_df["split_point"].notna()]

## Check the fugues

In [16]:
#we use the midi file name (folder+performer) changing it to "fugue" to find the correspective fugue from the prelude
fugue_matched_performances_list = [p.replace("Prelude","Fugue") for p in prelude_virtuoso_df["performed_midi_path"]]

fugue_virtuoso_df = bach_virtuoso_df [[p["performed_midi_path"] in fugue_matched_performances_list for i,p in bach_virtuoso_df.iterrows()]]

#drop the correspondence column (that is nan anyway), we will take this information later
fugue_virtuoso_df = fugue_virtuoso_df.drop(["correspondence"],axis = 1)

#add the column "opusperformer" to perform the joining later with preludes
fugue_virtuoso_df["opusperformer"] = [t.split("/")[2]+"/"+t.split("/")[3] for t in fugue_virtuoso_df["performed_midi_path"]]

In [17]:
#add "opusperformer" also to the prelude to perform the join
temp = prelude_virtuoso_df.copy()
temp["opusperformer"] = [t.split("/")[2]+"/"+t.split("/")[3] for t in prelude_virtuoso_df["performed_midi_path"]]
temp = temp[["opusperformer","split_point","correspondence"]]

# add the splitpoint and correspondence information joining the tables on the midi file name
fugue_virtuoso_df = fugue_virtuoso_df.join(temp.set_index(["opusperformer"]), on=["opusperformer"])

#drop the "opusperformer" row now useless
fugue_virtuoso_df = fugue_virtuoso_df.drop(["opusperformer"],axis = 1)

In [18]:
#add the norm_notes for the maestro fugues
fugue_virtuoso_df["maestro_norm_notes"]= fugue_virtuoso_df.apply(lambda row: path2normmidinotes(MAESTRO_BPATH,row["correspondence"],start=row["split_point"]),axis=1)

In [19]:
#check if the norm notes are similar
fugue_virtuoso_df["aligned"] = fugue_virtuoso_df.apply(lambda row: notes_are_similar(row["norm_notes"],row["maestro_norm_notes"], None,tolerance=0.5,verbose=True),axis=1)

# there are some problems, but nothing too problematic because we are going to realign them with nakamura, so we accept them

Pitch problem occurring at indices:
[(386, 52, 45), (387, 45, 52)]
Offset problem: 
Not equal to tolerance rtol=1e-07, atol=0.5

Mismatch: 0.209%
Max absolute difference: 2.53205974
Max relative difference: 0.01003287
 x: array([1.153847e-01, 3.087610e-01, 4.615389e-01, ..., 2.547406e+02,
       2.549084e+02, 2.548421e+02])
 y: array([1.145833e-01, 3.085937e-01, 4.609375e-01, ..., 2.523763e+02,
       2.523763e+02, 2.523763e+02])
Pitch problem occurring at indices:
[(766, 61, 45), (767, 45, 61)]


In [24]:
#add to the midi_to_use the preludes
for i, row in prelude_virtuoso_df.iterrows():
    midi_to_save.append((row["performed_midi_path"],row["correspondence"],row["folder"],None,row["norm_notes"][-1][1]))

In [25]:
#add to the midi_to_use the fugues
for i, row in fugue_virtuoso_df.iterrows():
    midi_to_save.append((row["performed_midi_path"],row["correspondence"],row["folder"],row["split_point"],None))

In [26]:
len(midi_to_save)

150

In [27]:
midi_to_save

[('Bach/Fugue/bwv_857/Lan01.mid',
  '2011/MIDI-Unprocessed_05_R1_2011_MID--AUDIO_R1-D2_09_Track09_wav.midi',
  'Bach/Fugue/bwv_857',
  None,
  None),
 ('Bach/Fugue/bwv_874/Kurz01.mid',
  '2011/MIDI-Unprocessed_01_R1_2011_MID--AUDIO_R1-D1_03_Track03_wav.midi',
  'Bach/Fugue/bwv_874',
  None,
  None),
 ('Bach/Prelude/bwv_857/Lan01.mid',
  '2011/MIDI-Unprocessed_05_R1_2011_MID--AUDIO_R1-D2_08_Track08_wav.midi',
  'Bach/Prelude/bwv_857',
  None,
  None),
 ('Bach/Prelude/bwv_874/BianF01.mid',
  '2011/MIDI-Unprocessed_01_R1_2011_MID--AUDIO_R1-D1_02_Track02_wav.midi',
  'Bach/Prelude/bwv_874',
  None,
  None),
 ('Bach/Prelude/bwv_874/Kurz01.mid',
  '2011/MIDI-Unprocessed_01_R1_2011_MID--AUDIO_R1-D1_02_Track02_wav.midi',
  'Bach/Prelude/bwv_874',
  None,
  None),
 ('Bach/Prelude/bwv_846/Shi05.mid',
  '2006/MIDI-Unprocessed_19_R1_2006_01-07_ORIG_MID--AUDIO_19_R1_2006_01_Track01_wav.midi',
  'Bach/Prelude/bwv_846',
  None,
  136.80248038958334),
 ('Bach/Prelude/bwv_848/Denisova06.mid',
  '2008/M

## let's replace the replaceable virtuoso MIDI with maestro midi

In [53]:
# To split a midi we want to copy to a new file:
#   -first instrument (only one instrument: piano)
#     - all notes in the interval
#     - all control changes in the interval
def split_midi(midi_path, start, end):
    #import the midi
    midi = pm.PrettyMIDI(midi_path)
    # Depending on the case of start and stop we go for different options: 
    # the idea is to interact with the maestro midi less as possible
    if start is None and end is None: 
        #just save the midi as it is changing the name
        return midi, None, None
    elif start is None and end is not None:
        #truncate the current midi evoiding the notes and cc after a certain time
        first = midi.instruments[0].notes[0].start
        midi.instruments[0].notes = [note for note in midi.instruments[0].notes 
                                     if (note.start < end +first) ]
        midi.instruments[0].control_changes = [cc for cc in midi.instruments[0].control_changes
                                     if (cc.time < end +first) ]
        return midi,None, midi.get_end_time()
    elif start is not None and end is None:
        #delete the first part (the prelude). Shift the notes back in order for them to be almost at the beginning
        first = get_midi_notes(midi)[0][0]  #get the onset of the first note, starting point where are the tempos are computed from
        # we want the first note to be after 0.5 s from the beginning. There should be no interaction with the prelude in this way
        midi.instruments[0].notes = [pm.Note(velocity=note.velocity, pitch=note.pitch, start=note.start-start -first +0.5, end=note.end-start-first +0.5) 
                   for note in midi.instruments[0].notes 
                   if (note.start >= start + first - 0.5)]
        midi.control_changes = [pm.ControlChange(number=cc.number, value=cc.value, time=cc.time-start - first + 0.5)
                            for cc in midi.instruments[0].control_changes
                            if (cc.time >= start + first - 0.5 )]
        return midi, start +first -0.5,None
    else:
        raise Exception("Case not considered")

In [56]:
# attempt to split a midi file
index = 145
print(midi_to_save[index])
nmidi, start, stop = split_midi(str(Path(MAESTRO_BPATH,midi_to_save[index][1])),midi_to_save[index][3],midi_to_save[index][4])

('Bach/Fugue/bwv_892/Hou01.mid', '2011/MIDI-Unprocessed_09_R1_2011_MID--AUDIO_R1-D3_12_Track12_wav.midi', 'Bach/Fugue/bwv_892', 100.9765625, None)


In [57]:
print("start",start,"stop",stop)
# import IPython.display as ipd
# ipd.Audio(nmidi.synthesize(fs=4600),rate=4600)
nmidi.write(str(Path('../temp_midi.mid')))

start 101.43880208333333 stop None


In [58]:
# overwrite all the songs in virtuoso that are matched with maestro
# the name we chose is the old yamaha file + M + [1,2] if the file was splitted in 2 parts

#format : (virtuosomidi,maestromidi,folder,beginning,end)
correct_annotations = []
for virt, maestro, folder, start, end in midi_to_save:
    #save the new file
    midi, new_start, new_end = split_midi(str(Path(MAESTRO_BPATH,maestro)),start,end)
    midi.write(str(Path(VIRTUOSO_BPATH,virt))[:-4]+"M.mid")
    #delete the old file
    Path(VIRTUOSO_BPATH,virt).unlink()
    #save the corrected annotation
    correct_annotations.append({"virtuoso_path":virt,"maestro_path":maestro,"folder":folder,"start":new_start,"end":new_end})

In [None]:
#maybe revise?
- check the audio alignment
- check audio for prelude just importing
- check audio for fugues