In [1]:
import os
import sys
# sys.path.append("../virtuosoNet")

from pathlib import Path
import pandas as pd
import music21 as m21
import pretty_midi as pm
from  matplotlib import pyplot as plt

%matplotlib inline
import numpy as np
import math
import pandas as pd
# import librosa
# import soundfile

import IPython.display as ipd

from collections import Counter

In [2]:
BASE_PATH = "../"

## Produce downbeats and beats

In [32]:
def quant_downbeat_to_annotations(midi_quant_path):
    mididata = pm.PrettyMIDI(str(midi_quant_path))
    downbeats = mididata.get_downbeats()
    #the downbeat duration (in theory)
    db_len = downbeats[2]-downbeats[1]
    downbeat = {}
    for i, db in enumerate(downbeats[:-1]):
        assert(db not in downbeat) #check if all values are uniques
        if math.isclose(downbeats[i+1]-downbeats[i], db_len, rel_tol=1e-2):
            downbeat[db] = "db"
        else:
            downbeat[db]= "dbW"
    #sort the dictionary just in case there are weird values
    ord_downbeat = {k: v for k, v in sorted(downbeat.items(), key=lambda item: item[1])}
    return ord_downbeat

def quant_downbeat_to_annotations_from_midi(mididata):
    downbeats = mididata.get_downbeats()
    #the downbeat duration (in theory)
    db_len = downbeats[2]-downbeats[1]
    downbeat = {}
    for i, db in enumerate(downbeats[:-1]):
        assert(db not in downbeat) #check if all values are uniques
        if math.isclose(downbeats[i+1]-downbeats[i], db_len, rel_tol=1e-2):
            downbeat[db] = "db"
        else:
            downbeat[db]= "dbW"
    #sort the dictionary just in case there are weird values
    ord_downbeat = {k: v for k, v in sorted(downbeat.items(), key=lambda item: item[1])}
    return ord_downbeat

def quant_beat_to_annotations(midi_quant_path):
    mididata = pm.PrettyMIDI(str(midi_quant_path))
    beats = mididata.get_beats()
    #the beat duration (in theory)
    b_len = beats[40]-beats[39] #multiple short beats in the beginning sometimes
    beat = {}
    for i, b in enumerate(beats[:-1]):
        assert(b not in beat) #check if all values are uniques
        if math.isclose(beats[i+1]-beats[i], b_len, rel_tol=1e-2):
            beat[b] = "b"
        else:
            beat[b] = "bW"
    #sort the dictionary just in case there are weird values
    ord_beat = {k: v for k, v in sorted(beat.items(), key=lambda item: item[1])}
    return beat

def quant_beat_to_annotations_from_midi(mididata):
    beats = mididata.get_beats()
    #the beat duration (in theory)
    b_len = beats[40]-beats[39] #multiple short beats in the beginning sometimes
    beat = {}
    for i, b in enumerate(beats[:-1]):
        assert(b not in beat) #check if all values are uniques
        if math.isclose(beats[i+1]-beats[i], b_len, rel_tol=1e-2):
            beat[b] = "b"
        else:
            beat[b] = "bW"
    #sort the dictionary just in case there are weird values
    ord_beat = {k: v for k, v in sorted(beat.items(), key=lambda item: item[1])}
    return beat

def aggregate_annotations(beat, downbeat):
    #merge the downbeat and the beat, with downbeat overwriting beat informations
    annotations ={**beat, **downbeat}
    return annotations

def beat_downbeat_to_text(annotations,out_path):
    path = Path(out_path)
    #produce the txt
    file_content = ""
    for a in annotations:
        file_content+= str(a) + "\t" + str(a) + "\t" + annotations[a] + "\n"
    with open(out_path,"w") as f:
        f.write(file_content)
        
def annotations_to_audio(midi_quant_path, annotations, out_path):
    midi_data = pm.PrettyMIDI(str(midi_quant_path))
    audio_fs = 22100
    beat = [a for a in annotations if annotations[a][0] == "b"]
    downbeat = [a for a in annotations if annotations[a][0] == "d"]
    midi_audio = midi_data.synthesize(fs=audio_fs)
    audio_beat = librosa.clicks(beat, sr=audio_fs, click_freq=900, length=len(midi_audio))
    audio_downbeat = librosa.clicks(downbeat, sr=audio_fs, click_freq=1200, length=len(midi_audio))
    soundfile.write(out_path,audio_beat + audio_downbeat+ midi_audio,audio_fs)

## produce for the entire dataset
Fields:
- author
- name
- folder
- performance_midi
- score_midi
- score_xml
- score2midi_alignment
- midi2midi_alignment_path
- beats
- downbeats

Dataframe problems:
- Bach/Fugue/bwv_860/midi_cleaned.mid, index: 29, the downbeat is every 3 downbeats
- Scriabin/Etudes_op_8/11/midi_cleaned.mid, index: 938, downbeat is half of the downbeat, beat is not aligned
- Bach/Fugue/bwv_857/Bult-ItoS02.mid, index: 16, beat have problems because matched with wrong note of trillo

In [3]:
#parse the VirtuosoNet structure

def parse_folder_content(folder_path,base_path):
    #take the performance_midi
    assert(Path(base_path,folder_path.relative_to(base_path),"midi_cleaned.mid").exists())
    score_midi_path = Path(folder_path.relative_to(base_path),"midi_cleaned.mid").as_posix()
    #take the score_midi
    assert( Path(base_path,folder_path.relative_to(base_path),"musicxml_cleaned.musicxml").exists())
    score_xml_path = Path(folder_path.relative_to(base_path),"musicxml_cleaned.musicxml").as_posix()
    #find the performances
    performances_name = []
    for file in folder_path.iterdir():
        if file.suffix.lower() == ".mid" and file.name.lower() != "midi_cleaned.mid" and (not file.name.lower().endswith("xp.mid")) and file.name.lower()!="midi.mid":
            performances_name.append(file)
    #iterate over the performances
    performances = []
    for p in performances_name:
        #check if everything exists
        assert(Path(base_path,folder_path.relative_to(base_path),p.name).exists())
        print(Path(base_path,folder_path.relative_to(base_path),p.stem + "_infer_corresp.txt"))
        assert(Path(base_path,folder_path.relative_to(base_path),p.stem + "_infer_corresp.txt").exists())
        assert(Path(base_path,folder_path.relative_to(base_path),p.stem + "_infer_match.txt"))
        performances.append({
            "performer" : p.stem,
            "score_midi_path" : score_midi_path,
            "score_xml_path" : score_xml_path,
            "performed_midi_path": Path(folder_path.relative_to(base_path),p.name).as_posix(),
            "score2midi_alignment" : Path(folder_path.relative_to(base_path),p.stem + "_infer_match.txt").as_posix(),
            "midi2midi_alignment_path" : Path(folder_path.relative_to(base_path),p.stem + "_infer_corresp.txt").as_posix(),
            "folder" : Path(folder_path.relative_to(base_path)).as_posix()
        })
    return performances     

In [4]:
basepath = Path('')
for e in basepath.iterdir(): #loop on the authors
    if e.is_dir() and e.name[0] != ".":
        print(e)

Bach
Balakirev
Beethoven
Brahms
Chopin
Debussy
Glinka
Haydn
Liszt
Mozart
Prokofiev
Rachmaninoff
Ravel
Schubert
Schumann
Scriabin
util


In [5]:
#explore all the subfolders and collect the useful files name

basepath = Path('')
performances = []
for e in basepath.iterdir(): #loop on the authors
    if e.is_dir() and e.name[0] != ".":
        print("Processing", e.name)
        for ee in e.iterdir(): # loop on kind of opus or piece
            assert(ee.is_dir())
            # can be inside a piece folder or list of pieces folders
            if any([eee.is_dir() for eee in ee.iterdir()]): #this contains folder of pieces
                for eee in ee.iterdir():
                    if eee.is_dir():
                        print(e.name, eee.name)
                        print(eee.name,ee.name,e.name)
                        ps = (parse_folder_content(eee,basepath))
                        #add title and author
                        for p in ps:
                            p["title"] = ee.name+"_"+eee.name
                            p["author"] = e.name
                        performances.extend(ps)
            else: 
                ps = (parse_folder_content(ee,basepath))
                #add title and author
                for p in ps:
                    p["title"] = ee.name
                    p["author"] = e.name
                performances.extend(ps)

Processing Bach
Bach bwv_846
bwv_846 Fugue Bach
Bach\Fugue\bwv_846\Shi05M_infer_corresp.txt


AssertionError: 

In [7]:
df = pd.DataFrame(performances)

df.groupby(["author","title"]).count()
df.shape
df

df.to_pickle("performance_dataframe.pkl")

In [4]:
df[df["folder"]=="Mozart/Fantasie_475"]

NameError: name 'df' is not defined

In [11]:
# create and save as excel for collaboration
df_folder = df.drop_duplicates(subset=["folder"])[["folder"]].reset_index(drop=True).rename(columns={"folder": "opus"})
df_folder["done"] = [False for i in df_folder.iterrows()]
df_folder

# uncomment the following to write the file
# df_folder.to_excel("output.xlsx")

Unnamed: 0,opus,done
0,Bach/Fugue/bwv_846,False
1,Bach/Fugue/bwv_848,False
2,Bach/Fugue/bwv_854,False
3,Bach/Fugue/bwv_856,False
4,Bach/Fugue/bwv_857,False
5,Bach/Fugue/bwv_858,False
6,Bach/Fugue/bwv_860,False
7,Bach/Fugue/bwv_862,False
8,Bach/Fugue/bwv_863,False
9,Bach/Fugue/bwv_864,False


In [15]:
# create and save the df of scores
df_scores = df.drop_duplicates(subset=["score_xml_path"])[["score_xml_path","folder","author"]].reset_index(drop=True).rename(columns={"folder": "opus"})
df_scores
# df_scores.to_pickle("score_dataframe.pkl")

#create and save the df of quantized things (midi and score)
df_quant = df.drop_duplicates(subset=["score_xml_path"])[["score_xml_path", "score_midi_path","folder","author"]].reset_index(drop=True).rename(columns={"folder": "opus"})
df_quant
# df_scores.to_pickle("quant_dataframe.pkl")


Unnamed: 0,score_xml_path,score_midi_path,opus,author
0,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Bach/Fugue/bwv_846/midi_cleaned.mid,Bach/Fugue/bwv_846,Bach
1,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,Bach/Fugue/bwv_848,Bach
2,Bach/Fugue/bwv_854/musicxml_cleaned.musicxml,Bach/Fugue/bwv_854/midi_cleaned.mid,Bach/Fugue/bwv_854,Bach
3,Bach/Fugue/bwv_856/musicxml_cleaned.musicxml,Bach/Fugue/bwv_856/midi_cleaned.mid,Bach/Fugue/bwv_856,Bach
4,Bach/Fugue/bwv_857/musicxml_cleaned.musicxml,Bach/Fugue/bwv_857/midi_cleaned.mid,Bach/Fugue/bwv_857,Bach
5,Bach/Fugue/bwv_858/musicxml_cleaned.musicxml,Bach/Fugue/bwv_858/midi_cleaned.mid,Bach/Fugue/bwv_858,Bach
6,Bach/Fugue/bwv_860/musicxml_cleaned.musicxml,Bach/Fugue/bwv_860/midi_cleaned.mid,Bach/Fugue/bwv_860,Bach
7,Bach/Fugue/bwv_862/musicxml_cleaned.musicxml,Bach/Fugue/bwv_862/midi_cleaned.mid,Bach/Fugue/bwv_862,Bach
8,Bach/Fugue/bwv_863/musicxml_cleaned.musicxml,Bach/Fugue/bwv_863/midi_cleaned.mid,Bach/Fugue/bwv_863,Bach
9,Bach/Fugue/bwv_864/musicxml_cleaned.musicxml,Bach/Fugue/bwv_864/midi_cleaned.mid,Bach/Fugue/bwv_864,Bach


In [7]:
from collections import Counter
Counter(df["author"]).keys()

dict_keys(['Bach', 'Balakirev', 'Beethoven', 'Brahms', 'Chopin', 'Debussy', 'Glinka', 'Haydn', 'Liszt', 'Mozart', 'Prokofiev', 'Rachmaninoff', 'Ravel', 'Schubert', 'Schumann', 'Scriabin'])

In [31]:
# WRITE FILES IN THE DATASET. Uncomment the last two lines to write them

def row_to_quant_ann(row):
    dbs = quant_downbeat_to_annotations(row["score_midi_path"])
    bs = quant_beat_to_annotations(row["score_midi_path"])
    anns = aggregate_annotations(bs,dbs)
    beat_downbeat_to_text(anns, str(Path(row["folder"],"ann_quant.txt")))
    annotations_to_audio(row["score_midi_path"],anns,str(Path(row["folder"],"quant_click.wav")))

# for i,row in df.drop_duplicates(subset ="score_midi_path", keep = "first", inplace = False).iterrows():
#     row_to_quant_ann(row)



In [21]:
df.drop_duplicates(subset=["title"]).groupby("author").count()

Unnamed: 0_level_0,folder,midi2midi_alignment_path,performed_midi_path,performer,score2midi_alignment,score_midi_path,score_xml_path,title
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bach,59,59,59,59,59,59,59,59
Balakirev,1,1,1,1,1,1,1,1
Beethoven,64,64,64,64,64,64,64,64
Brahms,1,1,1,1,1,1,1,1
Chopin,36,36,36,36,36,36,36,36
Debussy,2,2,2,2,2,2,2,2
Glinka,1,1,1,1,1,1,1,1
Haydn,12,12,12,12,12,12,12,12
Liszt,16,16,16,16,16,16,16,16
Mozart,3,3,3,3,3,3,3,3


## Produce annotations for a single song
when there is something to correct in the generation

In [10]:
# Chopin/Ballade/3
row = df.iloc[125]

score_path = row["score_xml_path"]
midi_quant_path = row["score_midi_path"]

print("processing score", score_path)

midi_data = pm.PrettyMIDI(str(midi_quant_path))      
        
dbs = quant_downbeat_to_annotations_from_midi(midi_data)
bs = quant_beat_to_annotations_from_midi(midi_data)
anns = aggregate_annotations(bs,dbs)


beat_downbeat_to_text(anns, str(Path(row["opus"],"ann_quant.txt")))
# annotations_to_audio(row["score_midi_path"],anns,str(Path(row["opus"],"quant_click.wav")))

processing score Chopin/Ballades/1/musicxml_cleaned.musicxml


In [12]:
# Mozart/Fantasie_475
row = df.iloc[192]

score_path = row["score_xml_path"]
midi_quant_path = row["score_midi_path"]

print("processing score", score_path)

midi_data = pm.PrettyMIDI(str(Path(BASE_PATH,midi_quant_path)))      
        
dbs = quant_downbeat_to_annotations_from_midi(midi_data)
bs = quant_beat_to_annotations_from_midi(midi_data)
anns = aggregate_annotations(bs,dbs)


beat_downbeat_to_text(anns, str(Path(BASE_PATH,row["opus"],"ann_quant.txt")))
# annotations_to_audio(row["score_midi_path"],anns,str(Path(row["opus"],"quant_click.wav")))

processing score Mozart/Fantasie_475/musicxml_cleaned.musicxml




In [34]:
# Schumann/Kreisleriana/1
row = df.iloc[213]

score_path = row["score_xml_path"]
midi_quant_path = row["score_midi_path"]

print("processing score", score_path)

midi_data = pm.PrettyMIDI(str(Path(BASE_PATH,midi_quant_path)))      
        
dbs = quant_downbeat_to_annotations_from_midi(midi_data)
bs = quant_beat_to_annotations_from_midi(midi_data)
anns = aggregate_annotations(bs,dbs)


beat_downbeat_to_text(anns, str(Path(BASE_PATH,row["opus"],"ann_quant.txt")))

processing score Schubert/Impromptu_op.90_D.899/4_no_repeat/musicxml_cleaned.musicxml


In [4]:
df = pd.read_pickle(Path(BASE_PATH,"quant_dataframe.pkl"))
df[df.opus == "Schubert/Impromptu_op142/3"]

Unnamed: 0,score_xml_path,score_midi_path,opus,author
215,Schubert/Impromptu_op142/3/musicxml_cleaned.mu...,Schubert/Impromptu_op142/3/midi_cleaned.mid,Schubert/Impromptu_op142/3,Schubert


In [13]:
Counter(pd.read_pickle(Path(BASE_PATH,"quant_dataframe.pkl"))["author"])

Counter({'Bach': 59,
         'Balakirev': 1,
         'Beethoven': 64,
         'Brahms': 1,
         'Chopin': 36,
         'Debussy': 2,
         'Glinka': 1,
         'Haydn': 12,
         'Liszt': 16,
         'Mozart': 6,
         'Prokofiev': 1,
         'Rachmaninoff': 5,
         'Ravel': 5,
         'Schubert': 15,
         'Schumann': 11,
         'Scriabin': 2})

.......
After all the quantized annotation cleaning
After producing the ann_cleaned.txt files
After checking the quality of annotations with the "check_quantized_annotations_quality" notebook
......

# Produce Beat/Downbeats with integrated time signature

## Load the data

In [3]:
BASE_PATH = Path("../")

quant_df = pd.read_excel(Path(BASE_PATH,"dataframes","quant_cleaning_info.xlsx"),index_col=0)
quant_df["scorexml_path"] = quant_df["opus"] + "/musicxml_cleaned.musicxml"
quant_df["scoremidi_path"] = quant_df["opus"] + "/midi_cleaned.mid"

In [4]:
#load the beat position
def load_anns_from_file(row):
    try:
        ann_df = pd.read_csv(Path(BASE_PATH,row.opus,"ann_quant_cleaned.txt"),header=None, names=["time","time2","type"],sep='\t')
        ann_list = [(row["time"],row["type"]) for i,row in ann_df.iterrows()]
        return ann_list
    except Exception as e:
        print("Problems for opus", row.opus, e)
        return np.nan

quant_df['beats'] = quant_df.apply(load_anns_from_file, axis=1)

Problems for opus Beethoven/Piano_Sonatas/32-2 [Errno 2] File ..\Beethoven\Piano_Sonatas\32-2\ann_quant_cleaned.txt does not exist: '..\\Beethoven\\Piano_Sonatas\\32-2\\ann_quant_cleaned.txt'


## Produce the ts changes

In [7]:
# create the time signature changes

def score2ts_changes(score_path, remove_duplicates = True):
    score = m21.converter.parse(score_path)
    ts_changes = [ str(ts.numerator)+"/"+str(ts.denominator) for ts in score.parts[0].recurse().getElementsByClass(m21.meter.TimeSignature)]
    if remove_duplicates:
        #remove the duplicates
        ts_changes = [ts for i,ts in enumerate(ts_changes) if i==0 or (i!=0 and ts_changes[i-1]!=ts)]
    return ts_changes

def midi2ts_changes(midi_path, remove_duplicates = True):
    midi = pm.PrettyMIDI(midi_path)
    ts_changes = [(ts.time, str(ts.numerator)+"/"+str(ts.denominator)) for ts in midi.time_signature_changes]
    if remove_duplicates:
    #remove the duplicates
        ts_changes = [ts for i,ts in enumerate(ts_changes) if i==0 or (i!=0 and ts_changes[i-1][1]!=ts[1])]
    return ts_changes

def align_score_ts_with_midi_ts(score_path,midi_path):
    midi_ts_changes = midi2ts_changes(midi_path)
    score_ts_changes = score2ts_changes(score_path)
    out = []
    for si, sts in enumerate(score_ts_changes):
        found_at_index = None
        for mi, mts in enumerate(midi_ts_changes):
            if sts == mts[1]:
                out.append((sts,mts[0]))
                found_at_index = mi
                break
        if found_at_index is None:
            raise(ValueError("The algorithm of ts aligning is not working"))
        if found_at_index< len(midi_ts_changes):
            midi_ts_changes = midi_ts_changes[found_at_index+1:]
    return out

def row_to_ts_align(row):
    try:
        return align_score_ts_with_midi_ts(str(Path(BASE_PATH,row["scorexml_path"])),str(Path(BASE_PATH,row["scoremidi_path"])))
    except Exception as e:
        print("Processing piece",row["opus"], "..................Exception!!" )
        print(e)
        return None 
    
quant_df["ts_changes"] = quant_df.apply(row_to_ts_align, axis = 1)



Processing piece Beethoven/Piano_Sonatas/32-2 ..................Exception!!
Error in getting DynamicWedges...Measure no. 57 P1


In [10]:
# move the manual annotations to the closest leftmost note in a 35ms window

def align_ann_to_closest_leftmost_event(row):
    print("Aligning opus", row.opus)
    midi_path = Path(BASE_PATH,row["opus"],"midi_cleaned.mid")
    midi = pm.PrettyMIDI(str(midi_path))
    
    if (type(row.beats) == float and pd.isnull(row.beats)):
        print("Beat or downbeat midding, alignment skipped")
        return np.nan
    
    # align annotations
    aligned_beats = []
    for b_time, b_type in row.beats:
        # find notes in the window
        close_notes_onset = [note.start for inst in midi.instruments for note in inst.notes 
                             if (note.start >= b_time - 0.0175) and (note.start <= b_time + 0.0175)]
        if len(close_notes_onset) == 0: # if no close note found (beat on a rest)
            aligned_beats.append((b_time,b_type))
        else:
            aligned_beats.append((sorted(close_notes_onset)[0],b_type)) # align to the leftmost of the window
            shift = np.abs(b_time- sorted(close_notes_onset)[0])
            if shift > 0:
                print("beat at",b_time, "moved of ",shift)
                
    return aligned_beats

def align_ts_to_beat(row):
    print("Aligning opus", row.opus)
    if (type(row.beats) == float and pd.isnull(row.beats)):
        print("Beat or downbeat missing, alignment skipped")
        return np.nan
    
    downbeats = [b[0] for b in row.beats if b[1]=="db"]
    
    # align ts
    aligned_ts = []
    for ts in row.ts_changes:
        # find beat in the window
        close_db = [db for db in downbeats if (db >= ts[1] - 0.0175) and (db <= ts[1] + 0.0175)]
        if ts[1] == 0: #initial time signature, can not coincide with downbeats, so don't align it
            aligned_ts.append(ts)
        elif len(close_db) != 1: # if no close note found (beat on a rest)
            print("Some problems for ts",ts)
        else:
            aligned_ts.append((ts[0],close_db[0])) # align to the close downbeat
            shift = np.abs(ts[1]- close_db[0])
            if shift > 0:
                print("ts",ts, "moved of ",shift)
    
    return aligned_ts


quant_df['beats'] = quant_df.apply(align_ann_to_closest_leftmost_event, axis=1)
quant_df["ts_changes"] = quant_df.apply(align_ts_to_beat, axis=1)

Aligning opus Bach/Fugue/bwv_846
Aligning opus Bach/Fugue/bwv_848
Aligning opus Bach/Fugue/bwv_854
Aligning opus Bach/Fugue/bwv_856
Aligning opus Bach/Fugue/bwv_857
Aligning opus Bach/Fugue/bwv_858
Aligning opus Bach/Fugue/bwv_860
Aligning opus Bach/Fugue/bwv_862
Aligning opus Bach/Fugue/bwv_863
Aligning opus Bach/Fugue/bwv_864
Aligning opus Bach/Fugue/bwv_865
Aligning opus Bach/Fugue/bwv_866
Aligning opus Bach/Fugue/bwv_867
Aligning opus Bach/Fugue/bwv_868
Aligning opus Bach/Fugue/bwv_870
Aligning opus Bach/Fugue/bwv_873
Aligning opus Bach/Fugue/bwv_874
Aligning opus Bach/Fugue/bwv_875
Aligning opus Bach/Fugue/bwv_876
Aligning opus Bach/Fugue/bwv_880
Aligning opus Bach/Fugue/bwv_883
Aligning opus Bach/Fugue/bwv_884
Aligning opus Bach/Fugue/bwv_885
Aligning opus Bach/Fugue/bwv_887
Aligning opus Bach/Fugue/bwv_888
Aligning opus Bach/Fugue/bwv_889
Aligning opus Bach/Fugue/bwv_891
Aligning opus Bach/Fugue/bwv_892
Aligning opus Bach/Fugue/bwv_893
Aligning opus Bach/Italian_concerto
Alignin

Aligning opus Rachmaninoff/Preludes_op_32/5
Aligning opus Ravel/Gaspard_de_la_Nuit/1_Ondine
beat at 194.98958233333332 moved of  0.01666666666665151
Aligning opus Ravel/Jeux_deau
beat at 124.18124999999999 moved of  0.01666666666666572
Aligning opus Ravel/Miroirs/3_Une_Barque
Aligning opus Ravel/Miroirs/4_Alborada_del_gracioso
Aligning opus Ravel/Pavane
Aligning opus Schubert/Impromptu_op.90_D.899/1
Aligning opus Schubert/Impromptu_op.90_D.899/2
Aligning opus Schubert/Impromptu_op.90_D.899/3
Aligning opus Schubert/Impromptu_op.90_D.899/4
Aligning opus Schubert/Impromptu_op.90_D.899/4_no_repeat
Aligning opus Schubert/Impromptu_op142/1
Aligning opus Schubert/Impromptu_op142/3
Aligning opus Schubert/Moment_Musical_no_1
Aligning opus Schubert/Moment_musical_no_3
Aligning opus Schubert/Piano_Sonatas/664-1
Aligning opus Schubert/Piano_Sonatas/664-2
Aligning opus Schubert/Piano_Sonatas/664-3
Aligning opus Schubert/Piano_Sonatas/894-2
Aligning opus Schubert/Piano_Sonatas/894-2_no_repeat
Aligni

In [20]:
# move the first time signature to the first downbeat position
def first_ts_at_first_db(row):
    try:
        first_db = [e[0] for e in row.beats if e[1]=='db'][0]
        first_ts = row.ts_changes[0][1]
        if first_db!= first_ts:
            print("Changed for piece",row.opus)
            new_tsc = row.ts_changes
            new_tsc[0] = (new_tsc[0][0],first_db)
            return new_tsc
        else:
            return row.ts_changes
    except:
        print("Not working for opus",row.opus)
        return None
        
quant_df["ts_changes"] = quant_df.apply(first_ts_at_first_db,axis=1)

Not working for opus Beethoven/Piano_Sonatas/32-2


In [19]:
quant_df.head()

Unnamed: 0,opus,done,multiple_time_signatures,big_tempo_changes,time_signatures,problems,other,invalid_nak_alignment,scorexml_path,scoremidi_path,beats,ts_changes
0,Bach/Fugue/bwv_846,True,False,False,4/4,last db missing,,0.0,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Bach/Fugue/bwv_846/midi_cleaned.mid,"[(0.5, b), (1.0, b), (1.5, b), (2.0, db), (2.5...","[(4/4, 2.0)]"
1,Bach/Fugue/bwv_848,True,False,False,4/4,last db missing,,0.0,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,"[(1.0, b), (1.5, b), (2.0, db), (2.5, b), (3.0...","[(4/4, 2.0)]"
2,Bach/Fugue/bwv_854,True,False,False,4/4,,,0.0,Bach/Fugue/bwv_854/musicxml_cleaned.musicxml,Bach/Fugue/bwv_854/midi_cleaned.mid,"[(1.0, b), (1.5, b), (2.0, db), (2.5, b), (3.0...","[(4/4, 2.0)]"
3,Bach/Fugue/bwv_856,True,False,False,3/8,,,0.0,Bach/Fugue/bwv_856/musicxml_cleaned.musicxml,Bach/Fugue/bwv_856/midi_cleaned.mid,"[(0.0, b), (0.25, db), (0.5, b), (0.75, b), (1...","[(3/8, 0.25)]"
4,Bach/Fugue/bwv_857,True,False,False,4/4,,,0.0,Bach/Fugue/bwv_857/musicxml_cleaned.musicxml,Bach/Fugue/bwv_857/midi_cleaned.mid,"[(0.5, b), (1.0, b), (1.5, b), (2.0, db), (2.5...","[(4/4, 2.0)]"


## Save the ts changes and the annotations to audacity format

In [3]:
def row2file_annotations(row):
    #in case beats are missing, exit
    if (type(row.beats) == float and pd.isnull(row.beats)):
        print("Beat or downbeat missing, alignment skipped")
        return 
    
    path = Path(BASE_PATH,row.opus,"annotations.txt")
    #produce the txt
    file_content = ""
    for a in row.beats:
        file_content+= str(a[0]) + "\t" + str(a[0]) + "\t" + a[1] 
        #add the ts change if needed
        if a[0] in [ts[1] for ts in row.ts_changes]:
            file_content+=","+ str([ts[0] for ts in row.ts_changes if ts[1]==a[0]][0])
        file_content+= "\n"
    with open(path,"w") as f:
        f.write(file_content)
    
quant_df.apply(row2file_annotations,axis=1)

NameError: name 'quant_df' is not defined

## Save the key changes to audacity format

In [11]:
all_df = pd.read_csv(Path(BASE_PATH,"ASAP_v1.csv"))
all_df

Unnamed: 0.1,Unnamed: 0,vnet_title,vnet_composer,score_xml,score_midi,maestro_midi,start,end,performed_midi_path,performed_annotations_path,same_length
0,0,Fugue_bwv_846,Bach,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Bach/Fugue/bwv_846/midi_cleaned.mid,{maestro}/2006/MIDI-Unprocessed_19_R1_2006_01-...,144.567708,,Bach/Fugue/bwv_846/Shi05M.mid,Bach/Fugue/bwv_846/Shi05M_ann_cleaned.txt,True
1,1,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2008/MIDI-Unprocessed_02_R1_2008_01-...,69.317708,,Bach/Fugue/bwv_848/Denisova06M.mid,Bach/Fugue/bwv_848/Denisova06M_ann_cleaned.txt,True
2,2,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2011/MIDI-Unprocessed_08_R1_2011_MID...,74.795573,,Bach/Fugue/bwv_848/Lee01M.mid,Bach/Fugue/bwv_848/Lee01M_ann_cleaned.txt,True
3,3,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2017/MIDI-Unprocessed_049_PIANO049_M...,83.644792,,Bach/Fugue/bwv_848/LeeSH01M.mid,Bach/Fugue/bwv_848/LeeSH01M_ann_cleaned.txt,True
4,4,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2008/MIDI-Unprocessed_10_R1_2008_01-...,75.373698,,Bach/Fugue/bwv_848/Lin04M.mid,Bach/Fugue/bwv_848/Lin04M_ann_cleaned.txt,True
...,...,...,...,...,...,...,...,...,...,...,...
1063,1085,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/Na03.mid,Scriabin/Sonatas/5/Na03_ann_cleaned.txt,True
1064,1086,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,{maestro}/2009/MIDI-Unprocessed_09_R1_2009_01-...,,,Scriabin/Sonatas/5/Na07M.mid,Scriabin/Sonatas/5/Na07M_ann_cleaned.txt,True
1065,1087,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/TET02.mid,Scriabin/Sonatas/5/TET02_ann_cleaned.txt,True
1066,1088,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/Yeletskiy02.mid,Scriabin/Sonatas/5/Yeletskiy02_ann_cleaned.txt,True


In [12]:
quant_df = all_df.drop_duplicates(subset='score_xml', keep='first')
quant_df

Unnamed: 0.1,Unnamed: 0,vnet_title,vnet_composer,score_xml,score_midi,maestro_midi,start,end,performed_midi_path,performed_annotations_path,same_length
0,0,Fugue_bwv_846,Bach,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Bach/Fugue/bwv_846/midi_cleaned.mid,{maestro}/2006/MIDI-Unprocessed_19_R1_2006_01-...,144.567708,,Bach/Fugue/bwv_846/Shi05M.mid,Bach/Fugue/bwv_846/Shi05M_ann_cleaned.txt,True
1,1,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2008/MIDI-Unprocessed_02_R1_2008_01-...,69.317708,,Bach/Fugue/bwv_848/Denisova06M.mid,Bach/Fugue/bwv_848/Denisova06M_ann_cleaned.txt,True
10,10,Fugue_bwv_854,Bach,Bach/Fugue/bwv_854/musicxml_cleaned.musicxml,Bach/Fugue/bwv_854/midi_cleaned.mid,{maestro}/2015/MIDI-Unprocessed_R1_D1-1-8_mid-...,91.779167,,Bach/Fugue/bwv_854/LuA01M.mid,Bach/Fugue/bwv_854/LuA01M_ann_cleaned.txt,True
15,15,Fugue_bwv_856,Bach,Bach/Fugue/bwv_856/musicxml_cleaned.musicxml,Bach/Fugue/bwv_856/midi_cleaned.mid,{maestro}/2015/MIDI-Unprocessed_R1_D1-1-8_mid-...,59.978125,,Bach/Fugue/bwv_856/LuoJ01M.mid,Bach/Fugue/bwv_856/LuoJ01M_ann_cleaned.txt,True
16,16,Fugue_bwv_857,Bach,Bach/Fugue/bwv_857/musicxml_cleaned.musicxml,Bach/Fugue/bwv_857/midi_cleaned.mid,{maestro}/2015/MIDI-Unprocessed_R1_D1-1-8_mid-...,107.073958,,Bach/Fugue/bwv_857/Bult-ItoS02M.mid,Bach/Fugue/bwv_857/Bult-ItoS02M_ann_cleaned.txt,True
...,...,...,...,...,...,...,...,...,...,...,...
1046,1068,Kreisleriana_7,Schumann,Schumann/Kreisleriana/7/musicxml_cleaned.musicxml,Schumann/Kreisleriana/7/midi_cleaned.mid,,,,Schumann/Kreisleriana/7/JohannsonP08.mid,Schumann/Kreisleriana/7/JohannsonP08_ann.txt,True
1049,1071,Toccata,Schumann,Schumann/Toccata/musicxml_cleaned.musicxml,Schumann/Toccata/midi_cleaned.mid,,,,Schumann/Toccata/BENABD04.mid,Schumann/Toccata/BENABD04_ann.txt,True
1053,1075,Toccata_repeat,Schumann,Schumann/Toccata_repeat/musicxml_cleaned.musicxml,Schumann/Toccata_repeat/midi_cleaned.mid,{maestro}/2014/MIDI-UNPROCESSED_11-13_R1_2014_...,,,Schumann/Toccata_repeat/WangY07M.mid,Schumann/Toccata_repeat/WangY07M_ann.txt,True
1055,1077,Etudes_op_8_11,Scriabin,Scriabin/Etudes_op_8/11/musicxml_cleaned.musicxml,Scriabin/Etudes_op_8/11/midi_cleaned.mid,,,,Scriabin/Etudes_op_8/11/Shi03.mid,Scriabin/Etudes_op_8/11/Shi03_ann.txt,True


In [13]:
def score2ks_changes(score_path, remove_duplicates = True, enharmonic = True):
    score = m21.converter.parse(score_path)
#     keys = [ ks.sharps for ks in score.parts[0].recurse().getElementsByClass(m21.key.KeySignature)]
    keys = [ (ks.asKey(mode='major').tonic.pitchClass,ks.measureNumber,ks.sharps) for ks in score.parts[0].recurse().getElementsByClass(m21.key.KeySignature)]
    if remove_duplicates:
    #remove the duplicates
        if enharmonic: #we condider only the key as an integer [0,11]
            keys = [ks for i,ks in enumerate(keys) if i==0 or (i!=0 and keys[i-1][0]!=ks[0])]
        else: #we consider the number of sharps and flats
            keys = [ks for i,ks in enumerate(keys) if i==0 or (i!=0 and keys[i-1][2]!=ks[2])]
    return keys

def midi2ks_changes(midi_path, remove_duplicates = True):
    midi = pm.PrettyMIDI(midi_path)
    # 0 is C Major, 12 is C minor.
    keys = [(k.key_number,k.time) for k in midi.key_signature_changes]
    if remove_duplicates:
    #remove the duplicates
        keys = [ks for i,ks in enumerate(keys) if i==0 or (i!=0 and keys[i-1][0]!=ks[0])]
    return keys


def save_key_in_txt_annotations(row):
    #get the keys informations
    score_keys = score2ks_changes(str(Path(BASE_PATH,row["score_xml"])))
    midi_keys = midi2ks_changes(str(Path(BASE_PATH,row["score_midi"])))
#     assert([sk[0] for sk in score_keys]  == [mk[0] for mk in midi_keys]  ) #check if they coincide with the score
    if [sk[0] for sk in score_keys]  != [mk[0] for mk in midi_keys]:
        print("Not working for",row.score_midi,score_keys,midi_keys)
        return

    #get the annotations file
    quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')
    
    ks_out_dict = {}
    #check if there is a db close to every key change time (except for the first)
    for mk,sk in zip(midi_keys,score_keys):
        if mk[1] == 0:
            # it's the first of the piece, append it to the first annotation
            close_ann = [r["time"] for i,r in quant_ann_df.iterrows()][0]
            ks_out_dict[close_ann] = sk[2]
        else: 
            # find the close beat
#             db_list = [row["time"] for i,row in quant_ann_df.iterrows() if row["type"].split(",")[0]=="b" or ]
            close_anns = [r["time"] for i,r in quant_ann_df.iterrows() if (r["time"] >= mk[1] - 0.0175) and (r["time"] <= mk[1] + 0.0175)]
            if len(close_anns)!=1: 
                # we take the first annotation on the right
                close_ann = [r["time"] for i,r in quant_ann_df.iterrows() if (r["time"] >= mk[1] )][0]
            else:
                close_ann = close_anns[0]
            ks_out_dict[close_ann] = sk[2]
    
    #now write the file
    file_content= ""
    counter_written = 0
    for i, r in quant_ann_df.iterrows():
        if r["time"] in ks_out_dict.keys():
            if len(r["type"].split(","))==1: #no time signature
                file_content+= str(r["time"]) + "\t" + str(r["time"]) + "\t" + str(r["type"]) + ",," + str(ks_out_dict[r["time"]])  + "\n"
            elif len(r["type"].split(","))==2: #time signature
                file_content+= str(r["time"]) + "\t" + str(r["time"]) + "\t" + str(r["type"]) + "," + str(ks_out_dict[r["time"]])  + "\n"
            else:
                raise Exception("Bad annotations type")
        else:
            file_content+= str(r["time"]) + "\t" + str(r["time"]) + "\t" + str(r["type"])+ "\n"
    with open(Path(BASE_PATH,row.score_midi[:-17],"annotations3.txt"),"w") as f:
        f.write(file_content)
        
    return
    

# def row_to_midi_and_score_key(row):
#     score_keys = score2ks_changes(str(Path(BASE_PATH,row["score_xml"])))
# #     print(score_keys)
#     midi_keys = midi2ks_changes(str(Path(BASE_PATH,row["score_midi"])))
#     out = []
#     for sk,mk in zip(score_keys,midi_keys):
#         assert(sk[0] == mk[0] )
# #         out.append({"key":mk[0],"sharps":sk[2],"midi_time":mk[1],"score_measure":sk[1]})
#         out.append({"key":mk[0],"sharps":sk[2],"midi_time":mk[1],"score_measure":sk[1]})
#     return out

In [14]:
quant_df.apply(save_key_in_txt_annotations,axis=1)

Not working for Beethoven/Piano_Sonatas/7-3/midi_cleaned.mid [(2, 0, 2), (7, 56, 1)] [(2, 0.0), (7, 113.02318800000002), (2, 146.86037700000003)]
Not working for Chopin/Sonata_3/3rd/midi_cleaned.mid [(11, 1, 5), (8, 71, -4), (4, 77, 4), (11, 99, 5)] [(11, 0.0), (4, 113.01057626875), (8, 285.63766901875), (4, 310.23512295625), (11, 400.62070261875)]
Not working for Haydn/Keyboard_Sonatas/39-3/midi_cleaned.mid [(7, 1, 1)] [(7, 0.0), (2, 34.5), (7, 39.0), (2, 99.0), (7, 103.5)]
Not working for Liszt/Hungarian_Rhapsodies/6/midi_cleaned.mid [(1, 1, -5), (1, 42, 7), (1, 74, -5), (10, 96, -2)] [(1, 0.0), (10, 171.61927083333336)]
Not working for Schumann/Kreisleriana/1_no_first_repeat/midi_cleaned.mid [(5, 1, -1), (10, 27, -2), (5, 53, -1)] []
Not working for Schumann/Kreisleriana/2/midi_cleaned.mid [(10, 1, -2)] [(10, 0.0), (3, 179.75), (10, 184.25), (3, 207.25), (10, 211.75)]


0       None
1       None
10      None
15      None
16      None
        ... 
1046    None
1049    None
1053    None
1055    None
1058    None
Length: 233, dtype: object

In [30]:
m21.converter.parse(Path("C:/Users/fosca/Desktop/CNAM/performed-midi-dataset/Mozart/Fantasie_475/musicxml_cleaned2.musicxml"))

<music21.stream.Score 0x2088285b088>

In [45]:
len("a,4/4".split(","))

2

In [15]:
for i,row in quant_df.iterrows():
    score_keys = score2ks_changes(str(Path(BASE_PATH,row["score_xml"])),enharmonic = False)
    midi_keys = midi2ks_changes(str(Path(BASE_PATH,row["score_midi"])))
#     assert([sk[0] for sk in score_keys]  == [mk[0] for mk in midi_keys]  ) #check if they coincide with the score
    if [sk[0] for sk in score_keys]  != [mk[0] for mk in midi_keys]:
        print(row.performed_midi_path)
    
    
print("Done")

Beethoven/Piano_Sonatas/7-3/Larionova04.mid
Chopin/Sonata_3/3rd/GarritsonL10.mid
Haydn/Keyboard_Sonatas/39-3/Yarden07M.mid
Liszt/Hungarian_Rhapsodies/6/KaiRuiR04.mid
Schumann/Kreisleriana/1_no_first_repeat/JohannsonP02M.mid
Schumann/Kreisleriana/2/JohannsonP03.mid
Done


In [25]:
for i,row in quant_df.iterrows():
    print(row.performed_midi_path)
    assert(Path(BASE_PATH,row.score_midi[:-17],"annotations3.txt").exists())
    quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations3.txt"),header=None, names=["time","time2","type"],sep='\t')
    for i,r in quant_ann_df.iterrows():
        type_list = r["type"].split(",")
        if len(type_list)==1:
            pass
        elif len(type_list)==2:
            print("Time signature:",type_list[1])
        elif len(type_list)==3:
            if type_list[1]!="":
                print("Time signature:",type_list[1])
            print("Key:",m21.key.sharpsToPitch(int(type_list[2])))
        else:
            raise Exception

Bach/Fugue/bwv_846/Shi05M.mid
Key: C
Time signature: 4/4
Bach/Fugue/bwv_848/Denisova06M.mid
Key: C#
Time signature: 4/4
Bach/Fugue/bwv_854/LuA01M.mid
Key: E
Time signature: 4/4
Bach/Fugue/bwv_856/LuoJ01M.mid
Key: F
Time signature: 3/8
Bach/Fugue/bwv_857/Bult-ItoS02M.mid
Key: A-
Time signature: 4/4
Bach/Fugue/bwv_858/VuV01M.mid
Key: F#
Time signature: 4/4
Bach/Fugue/bwv_860/Ko04M.mid
Time signature: 6/8
Key: G
Bach/Fugue/bwv_862/Song04M.mid
Key: A-
Time signature: 4/4
Bach/Fugue/bwv_863/LeeN01M.mid
Key: B
Time signature: 4/4
Bach/Fugue/bwv_864/SunD01M.mid
Time signature: 9/8
Key: A
Bach/Fugue/bwv_865/Rizikov01M.mid
Key: C
Time signature: 4/4
Bach/Fugue/bwv_866/SOLOM02.mid
Key: B-
Time signature: 3/4
Bach/Fugue/bwv_867/HuNY01M.mid
Time signature: 2/2
Key: D-
Bach/Fugue/bwv_868/GonzalezJ05M.mid
Key: B
Time signature: 4/4
Bach/Fugue/bwv_870/ChenW01M.mid
Key: C
Time signature: 2/4
Bach/Fugue/bwv_873/Lisiecki02.mid
Time signature: 12/16
Key: E
Bach/Fugue/bwv_874/BianF01.mid
Key: D
Time signa

Beethoven/Piano_Sonatas/4-1/BENABD01.mid
Time signature: 6/8
Key: E-
Key: C
Key: E-
Beethoven/Piano_Sonatas/5-1/Colafelice02M.mid
Time signature: 3/4
Key: E-
Beethoven/Piano_Sonatas/7-1/Hebert01M.mid
Time signature: 2/2
Key: D
Key: C
Key: D
Beethoven/Piano_Sonatas/7-2/Larionova04.mid
Time signature: 6/8
Key: F
Beethoven/Piano_Sonatas/7-3/Larionova04.mid
Key: D
Time signature: 3/4
Key: G
Key: D
Beethoven/Piano_Sonatas/7-4/Larionova04M.mid
Time signature: 4/4
Key: D
Key: C
Key: D
Beethoven/Piano_Sonatas/8-1/ChenS01.mid
Time signature: 4/4
Key: E-
Key: C
Key: E-
Beethoven/Piano_Sonatas/8-2/Na06.mid
Time signature: 2/4
Key: A-
Beethoven/Piano_Sonatas/8-3/Na06M.mid
Time signature: 2/2
Key: E-
Beethoven/Piano_Sonatas/9-1/Tysman05M.mid
Time signature: 4/4
Key: E
Beethoven/Piano_Sonatas/9-2_no_trio/Tysman05.mid
Time signature: 3/4
Key: G
Beethoven/Piano_Sonatas/9-3/Tysman05M.mid
Key: E
Time signature: 2/2
Key: G
Key: E
Brahms/Six_Pieces_op_118/2/Shilyaev03.mid
Key: A
Time signature: 3/4
Chopin

Time signature: 3/4
Time signature: 4/4
Time signature: 3/4
Time signature: 2/4
Time signature: 3/4
Time signature: 2/4
Time signature: 4/4
Time signature: 3/4
Time signature: 4/4
Time signature: 3/4
Key: F#
Time signature: 4/4
Time signature: 2/4
Key: C
Time signature: 4/4
Time signature: 2/4
Key: B
Time signature: 3/4
Time signature: 4/4
Time signature: 5/4
Time signature: 4/4
Key: C
Time signature: 3/4
Time signature: 4/4
Key: C#
Ravel/Miroirs/3_Une_Barque/DupreeF20.mid
Time signature: 2/4
Key: A
Time signature: 3/4
Time signature: 2/4
Time signature: 3/4
Time signature: 2/4
Time signature: 1/4
Time signature: 3/4
Time signature: 4/4
Time signature: 3/4
Time signature: 4/4
Time signature: 3/4
Time signature: 5/4
Time signature: 2/4
Time signature: 3/4
Time signature: 2/4
Time signature: 3/4
Key: E-
Time signature: 4/4
Time signature: 3/4
Time signature: 4/4
Time signature: 5/4
Time signature: 2/4
Time signature: 3/4
Key: A
Time signature: 2/4
Time signature: 3/4
Time signature: 4/4


In [29]:
for i,row in quant_df.iterrows():
    # Delete old annotation
    Path(BASE_PATH,row.score_midi[:-17],"annotations2.txt").unlink()
    Path(BASE_PATH,row.score_midi[:-17],"annotations.txt").unlink()
    #rename annotations 3
    Path(BASE_PATH,row.score_midi[:-17],"annotations3.txt").rename(Path(Path(BASE_PATH,row.score_midi[:-17],"annotations3.txt").parent, "annotations.txt"))

# Check all pieces that have the same number of measures of the score

In [5]:
def same_number_of_measures(row):
    score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
    score_measures_n = len(score.parts[0].recurse().getElementsByClass(m21.stream.Measure))
    
    quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')
    len_ann= len([db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist() if db_tp.split(",")[0] == "db" ])
    if [db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist()][0] != "db": 
        len_ann += 1
    
    if len_ann!= score_measures_n:
        print("Different for", row.performed_midi_path, score_measures_n, len_ann )
    else:
        print("Same measures",row.performed_midi_path, score_measures_n, len_ann  )
    return len_ann==score_measures_n
    

In [105]:
same_number_of_measures(quant_df.loc[706])

Different for Chopin/Sonata_2/2nd_no_repeat/Giltburg02.mid 287 285


False

In [100]:
row = quant_df.loc[997]
score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
print(len(score.parts[0].recurse().getElementsByClass(m21.stream.Measure)))

quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')
[db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist()]
[db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist()][0] != "db"



137


False

In [6]:
quant_df["same measures"] = quant_df.apply(same_number_of_measures, axis = 1)

Same measures Bach/Fugue/bwv_846/Shi05M.mid 27 27
Same measures Bach/Fugue/bwv_848/Denisova06M.mid 55 55
Same measures Bach/Fugue/bwv_854/LuA01M.mid 29 29
Same measures Bach/Fugue/bwv_856/LuoJ01M.mid 73 73
Same measures Bach/Fugue/bwv_857/Bult-ItoS02M.mid 58 58
Same measures Bach/Fugue/bwv_858/VuV01M.mid 35 35
Same measures Bach/Fugue/bwv_860/Ko04M.mid 86 86
Same measures Bach/Fugue/bwv_862/Song04M.mid 35 35
Same measures Bach/Fugue/bwv_863/LeeN01M.mid 41 41
Same measures Bach/Fugue/bwv_864/SunD01M.mid 54 54
Same measures Bach/Fugue/bwv_865/Rizikov01M.mid 87 87
Same measures Bach/Fugue/bwv_866/SOLOM02.mid 48 48
Same measures Bach/Fugue/bwv_867/HuNY01M.mid 75 75
Same measures Bach/Fugue/bwv_868/GonzalezJ05M.mid 34 34
Same measures Bach/Fugue/bwv_870/ChenW01M.mid 83 83
Same measures Bach/Fugue/bwv_873/Lisiecki02.mid 71 71
Same measures Bach/Fugue/bwv_874/BianF01.mid 50 50
Same measures Bach/Fugue/bwv_875/Ahfat01M.mid 27 27
Same measures Bach/Fugue/bwv_876/SirajA01M.mid 70 70
Same measure

Same measures Chopin/Etudes_op_25/11/ChenS02.mid 96 96
Same measures Chopin/Etudes_op_25/12/Atzinger03.mid 83 83
Different for Chopin/Etudes_op_25/2/Karpeyev02.mid 70 69
Different for Chopin/Etudes_op_25/4/Taverna02.mid 66 65
Same measures Chopin/Etudes_op_25/5/Levitsky10M.mid 139 139
Same measures Chopin/Etudes_op_25/8/DeTurck02.mid 36 36
Same measures Chopin/Polonaises/53/Chon08M.mid 181 181


KeyboardInterrupt: 

In [87]:
quant_df[quant_df["same measures"]].shape

(92, 12)

In [91]:
ok_list = quant_df[quant_df["same measures"]]["score_xml"].tolist()

all_df[all_df["score_xml"].isin(ok_list)]

Unnamed: 0.1,Unnamed: 0,vnet_title,vnet_composer,score_xml,score_midi,maestro_midi,start,end,performed_midi_path,performed_annotations_path,same_length
169,171,Islamey,Balakirev,Balakirev/Islamey/musicxml_cleaned.musicxml,Balakirev/Islamey/midi_cleaned.mid,,,,Balakirev/Islamey/CHEN04.mid,Balakirev/Islamey/CHEN04_ann.txt,True
170,172,Islamey,Balakirev,Balakirev/Islamey/musicxml_cleaned.musicxml,Balakirev/Islamey/midi_cleaned.mid,{maestro}/2004/MIDI-Unprocessed_XP_04_R1_2004_...,,,Balakirev/Islamey/CHEN10M.mid,Balakirev/Islamey/CHEN10M_ann_cleaned.txt,True
171,173,Islamey,Balakirev,Balakirev/Islamey/musicxml_cleaned.musicxml,Balakirev/Islamey/midi_cleaned.mid,,,,Balakirev/Islamey/Cho05.mid,Balakirev/Islamey/Cho05_ann.txt,True
172,174,Islamey,Balakirev,Balakirev/Islamey/musicxml_cleaned.musicxml,Balakirev/Islamey/midi_cleaned.mid,,,,Balakirev/Islamey/Gorucan03.mid,Balakirev/Islamey/Gorucan03_ann.txt,True
173,175,Islamey,Balakirev,Balakirev/Islamey/musicxml_cleaned.musicxml,Balakirev/Islamey/midi_cleaned.mid,,,,Balakirev/Islamey/Na04.mid,Balakirev/Islamey/Na04_ann.txt,True
...,...,...,...,...,...,...,...,...,...,...,...
1063,1085,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/Na03.mid,Scriabin/Sonatas/5/Na03_ann_cleaned.txt,True
1064,1086,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,{maestro}/2009/MIDI-Unprocessed_09_R1_2009_01-...,,,Scriabin/Sonatas/5/Na07M.mid,Scriabin/Sonatas/5/Na07M_ann_cleaned.txt,True
1065,1087,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/TET02.mid,Scriabin/Sonatas/5/TET02_ann_cleaned.txt,True
1066,1088,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/Yeletskiy02.mid,Scriabin/Sonatas/5/Yeletskiy02_ann_cleaned.txt,True


In [6]:
# quant_df[quant_df.score_xml == "Schubert/Impromptu_op142/3/musicxml_cleaned.musicxml"]
quant_df[quant_df.score_xml == "Schumann/Kreisleriana/6/musicxml_cleaned.musicxml"]

Unnamed: 0.1,Unnamed: 0,vnet_title,vnet_composer,score_xml,score_midi,maestro_midi,start,end,performed_midi_path,performed_annotations_path,same_length
1043,1065,Kreisleriana_6,Schumann,Schumann/Kreisleriana/6/musicxml_cleaned.musicxml,Schumann/Kreisleriana/6/midi_cleaned.mid,,,,Schumann/Kreisleriana/6/JohannsonP07.mid,Schumann/Kreisleriana/6/JohannsonP07_ann_clean...,True


In [102]:
us = m21.environment.UserSettings()
us['musicxmlPath'] =  'C:/Program Files (x86)/MuseScore 2/bin/MuseScore.exe'
us['musescoreDirectPNGPath'] =  'C:/Program Files (x86)/MuseScore 2/bin/MuseScore.exe'

### Consider also repetitions

In [55]:
row = quant_df.loc[189]
score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
e = m21.repeat.Expander(streamObj= score.parts[0])
print(e.repeatBarsAreCoherent())
rep_list = list(score.parts[0].recurse().getElementsByClass(m21.repeat.RepeatMark))
print(rep_list)
# .findInnermostRepeatIndices(streamObj= score.parts[0])
m_map = e.measureMap()
print(m_map)
print(len(m_map))



True
[<music21.bar.Repeat direction=start>, <music21.bar.Repeat direction=end>, <music21.bar.Repeat direction=start>, <music21.repeat.Fine "Fine">, <music21.bar.Repeat direction=end>, <music21.bar.Repeat direction=start>, <music21.bar.Repeat direction=end>, <music21.bar.Repeat direction=start>, <music21.bar.Repeat direction=end>, <music21.repeat.DaCapoAlFine "D.C. al Fine">]


KeyError: 51

In [40]:
# version if Dc al fine

row = quant_df.loc[969]
score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
e = m21.repeat.Expander(streamObj= score.parts[0])
print(e.repeatBarsAreCoherent())
rep_list = list(score.parts[0].recurse().getElementsByClass(m21.repeat.RepeatMark))
print(rep_list)
print("Remove")
print(rep_list[0].measureNumber)
score.remove(rep_list[0],recurse= True)
print(list(score.parts[0].recurse().getElementsByClass(m21.repeat.RepeatMark)))
# .findInnermostRepeatIndices(streamObj= score.parts[0])
m_map = e.measureMap()
print(m_map)
print(len(m_map))

True
[<music21.repeat.Coda "Coda">]
Remove
252
[]


ExpanderException: cannot expand Stream: badly formed repeats or repeat expressions

In [128]:
row = quant_df.loc[741]
score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))

end_index = 0
found_note = False
while not found_note:    
    for part in score.parts:
        notes = list(part.measure(-end_index-1).recurse().notes)
        if len(notes)!= 0:
            print([n.tie for n in notes])
            for n in notes:
                if (n.tie is None or n.tie.type == 'start'):
                    found_note = True
                    break
    if not found_note:        
        end_index += 1

print(end_index)
# for part in score.parts:
#     print(part.measure(-1).recurse())

print([n.tie for n in list(score.parts[0].measure(-end_index-1).recurse().notes)])


[<music21.tie.Tie stop>]
[<music21.tie.Tie stop>]
[None, <music21.tie.Tie start>]
[None, <music21.tie.Tie start>]
1
[None, <music21.tie.Tie start>]


In [54]:
row = quant_df.loc[792]
print("Processing",row["score_xml"] )
score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
score_measures = score.parts[1].recurse().getElementsByClass(m21.stream.Measure)

some_notes_on_first_db = False
for part in score.parts:
    measure = part.recurse().getElementsByClass(m21.stream.Measure)[0]
#     print([n for n in measure.recurse().notes])
    print(measure.paddingLeft)
    notes_on_db = [n for n in measure.recurse().notes if n.beat == 1]
    if len(notes_on_db)!= 0:
        some_notes_on_first_db = True
        break
        
print(some_notes_on_first_db)

print(score_measures[81].show("text"))

score_measures[35].duration.updateQuarterLength()
print(score_measures[34].duration.quarterLength)

quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')
len_ann= len([db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist() if db_tp.split(",")[0] == "db" ])
quant_ann_df

Processing Liszt/Ballade_2/musicxml_cleaned.musicxml
5.5
5.5
False
{0.0} <music21.chord.Chord E1 E2>
{0.5} <music21.note.Rest rest>
{1.0} <music21.chord.Chord D#1 D#2>
{1.5} <music21.note.Rest rest>
{2.0} <music21.chord.Chord D1 D2>
{2.5} <music21.note.Rest rest>
{3.0} <music21.chord.Chord D2 D3>
{3.5} <music21.note.Rest rest>
None
4.0


Unnamed: 0,time,time2,type
0,0.100000,0.100000,"1,db,6/4,2"
1,0.700000,0.700000,b
2,1.300000,1.300000,"2,db"
3,1.900000,1.900000,b
4,2.500000,2.500000,"3,db"
...,...,...,...
1126,1167.738031,1167.738031,"315,db"
1127,1169.238031,1169.238031,b
1128,1170.757653,1170.757653,b
1129,1172.217621,1172.217621,b


In [5]:
all_df = pd.read_csv(Path(BASE_PATH,"metadata.csv"))
quant_df = all_df.drop_duplicates(subset="midi_score")
quant_df.index[quant_df.xml_score == "Schumann/Kreisleriana/2/xml_score.musicxml"]

Int64Index([1032], dtype='int64')

In [13]:
exception_dict = {
    "Beethoven/Piano_Sonatas/29-2/musicxml_cleaned.musicxml": {112:[113]},
    "Beethoven/Piano_Sonatas/29-4/musicxml_cleaned.musicxml": {0:[1],2:[3,4],12:[13,14,15]},
    "Beethoven/Piano_Sonatas/30-1/musicxml_cleaned.musicxml" : {15:[16],66:[67]},
    "Beethoven/Piano_Sonatas/31-3_4/musicxml_cleaned.musicxml": {4:[5,6],7:[8,9]},
    "Haydn/Keyboard_Sonatas/49-1/musicxml_cleaned.musicxml" : {131:[132]},
    "Liszt/Gran_Etudes_de_Paganini/2_La_campanella/musicxml_cleaned.musicxml" : {97: [98], 99:[100]},
    "Liszt/Mephisto_Waltz/musicxml_cleaned.musicxml" : {857: [858],198:[199],808: [809,910]},
    "Liszt/Transcendental_Etudes/4/musicxml_cleaned.musicxml": {23: [24,25],56:[57,58,59]},
    "Liszt/Transcendental_Etudes/9/musicxml_cleaned.musicxml": {45:[46],75:[76]},
    "Mozart/Fantasie_475/musicxml_cleaned.musicxml": {84: [85]},
    "Schumann/Kreisleriana/2/musicxml_cleaned.musicxml": {38:[39],57:[58],96: [97]}
}

repetition_not_working = {
    "Beethoven/Piano_Sonatas/11-3/musicxml_cleaned.musicxml" : list(range(0,9))*2+list(range(9,32))*2+list(range(32,41))*2+list(range(41,50))+list(range(41,49))+[50]+list(range(0,32)),
    "Beethoven/Piano_Sonatas/28-2/musicxml_cleaned.musicxml" : list(range(0,9)) + list(range(1,8)) + list(range(9,56)) + list(range(13,55)) + [56,57] + list(range(58,68))*2 + list(range(68,98)) + list(range(1,8))+ list(range(9,55)) + [56],
    "Beethoven/Piano_Sonatas/7-3/musicxml_cleaned.musicxml" : list(range(0,17))*2 + list(range(17,56))*2 + list(range(56,89)) + list(range(0,56))
}

In [16]:
#compute the measure map in the score
row = quant_df.loc[1043]
score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
#consider both hands because of some durations problems some measures has duration 0
score_measures_r = score.parts[0].recurse().getElementsByClass(m21.stream.Measure)
score_measures_l = score.parts[1].recurse().getElementsByClass(m21.stream.Measure)
if len(score.parts[0].recurse().getElementsByClass(m21.repeat.RepeatMark)) == 0:
    #no repetitions, we are going linearly
    m_map = list(range(len(score_measures_r)))
elif row["score_xml"] in repetition_not_working.keys() :
    print("DC al Fine -> Manual m_map")
    #in case of dc al fine, music21 does not work. We did it manually
    m_map = repetition_not_working[row["score_xml"]]
else:
    print("Founded repetitions")
#         print("-------------")
#         print("Processing", row["score_xml"] )
    try:
        e = m21.repeat.Expander(streamObj= score.parts[0])
        m_map = e.measureMap()
    except:
        print("Processing", row["score_xml"] )
        print("Expansion Exception")

#consider pickup measure in the score
if score_measures_r[0].paddingLeft > 0 or score_measures_l[0].paddingLeft > 0 :
    m_map = m_map[1:]
else: # or if there is a pause as first event
    some_notes_on_first_db = False
    for part in score.parts:
        measure = part.recurse().getElementsByClass(m21.stream.Measure)[0]
        notes_on_db = [n for n in measure.recurse().notes if n.beat == 1]
        if len(notes_on_db)!= 0:
            some_notes_on_first_db = True
            break
    if not some_notes_on_first_db:
        print("Removing the first measure in the score")
        m_map = m_map[1:]

score_measures_n = len(m_map)

#compute number of measures in the midi score (e.g. in the midi score annotations)
quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')
len_ann= len([db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist() if db_tp.split(",")[0] == "db" ])
#     #consider pickup measure in the midi score
#     if [db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist()][0] != "db": 
#         len_ann += 1

if len_ann== score_measures_n: # if it's already aligned, finish here to spare computation time
    print("Equal before all the transformations")
    print(m_map)
    new_map = m_map
else: # consider the splitted measures
    new_map = []
    i =0
    while i < len(m_map):
#         print(m_map[i])
        if (not exception_dict.get(row["score_xml"]) is None) and (m_map[i] in exception_dict[row["score_xml"]].keys()):
            print("Exception dict")
            connected_to = exception_dict[row["score_xml"]][m_map[i]]
            if len(connected_to) == 1:
                new_map.append(str(m_map[i])+"-"+str(connected_to[0]))
                i+=2
            elif len(connected_to) == 2:
                new_map.append(str(m_map[i])+"-"+str(connected_to[0])+"-"+str(connected_to[1]))
                i+=3
            elif len(connected_to) == 3:
                new_map.append(str(m_map[i])+"-"+str(connected_to[0])+"-"+str(connected_to[1])+"-"+str(connected_to[2]))
                i+=4
            else:
                raise Exception("connected too with too many elements")
        else:
            measure_r = score_measures_r[m_map[i]]
            measure_l = score_measures_l[m_map[i]]
    #         print(measure_r)
        #     print(m_ind,measure, measure.barDuration)
            m_dur = max([measure_r.duration.quarterLength,measure_l.duration.quarterLength]) #actual lenght
            m_ts_dur = max([measure_r.barDuration.quarterLength,measure_l.barDuration.quarterLength]) #lenght from the ts
    #         print("Dur, ts_dur",m_dur,m_ts_dur)
            if (m_ts_dur == m_dur): 
                new_map.append(m_map[i])
#                 print("added normally")
                i+=1
            elif (m_ts_dur > m_dur) and i!= len(m_map)-1:
                next_measure_r = score_measures_r[m_map[i+1]]
                next_measure_l = score_measures_l[m_map[i+1]]
                next_m_dur = max([next_measure_r.duration.quarterLength,next_measure_l.duration.quarterLength])
                next_m_ts_dur= max([next_measure_r.barDuration.quarterLength,next_measure_l.barDuration.quarterLength])
    #             print("NDur, Nts_dur",next_m_dur,next_m_ts_dur)
    #             if (m_dur + next_m_dur == m_ts_dur) and (next_m_ts_dur == m_ts_dur) : #two splitted measures
                if (m_dur + next_m_dur == m_ts_dur) and (next_m_dur < next_m_ts_dur ):
                    new_map.append(str(m_map[i])+"-"+str(m_map[i+1]))
                    i+=2
                    print("Splitted measure!", measure_r,next_measure_r )
                else:
                    new_map.append(m_map[i])
#                     print("added normally")
                    i+=1
            else:
    #                 print("Bigger",measure.barDuration.quarterLength,measure.duration.quarterLength)
                new_map.append(m_map[i])
                i+=1

#consider empty measures at the end of the score or measures with tied chords
# print("Processing the ending")
end_index = 0
found_note = False
while not found_note:    
    for part in score.parts:
#         notes = list(part.measure(-end_index-1).recurse().notes)
        print(list(part.recurse().getElementsByClass(m21.stream.Measure)[new_map[-end_index-1]].recurse().notes))
        notes = list(part.recurse().getElementsByClass(m21.stream.Measure)[new_map[-end_index-1]].recurse().notes)
        if len(notes)!= 0:
            for n in notes:
                if n.isChord:
                    for note in n.notes:
                        if (note.tie is None or note.tie.type == 'start'):
                            found_note = True
                            break
                    if found_note:
                        break
                else :  #it's a note
#                     print(n.tie.type)
                    if (n.tie is None or n.tie.type == 'start'):
                        found_note = True
                        break
    if not found_note:        
        end_index += 1
if end_index > 0:
    print("Cutting",end_index,"measures from the end")
    new_map = new_map[:-end_index]

if len_ann== len(new_map):
    print("Equal")
    print(new_map)
else:
    print("Different",row["score_xml"],len_ann, len(new_map) )
    print(new_map)

[<music21.chord.Chord B-3 D4>]
[<music21.note.Note F>]
[<music21.note.Note E->, <music21.note.Note E->, <music21.note.Note E->, <music21.note.Note G->, <music21.note.Note F>, <music21.note.Note E->, <music21.note.Note D>, <music21.note.Note F>, <music21.note.Note F>, <music21.chord.Chord B-3 D4 F4>, <music21.note.Note B->, <music21.note.Note E->, <music21.note.Note F>, <music21.note.Note A>, <music21.note.Note B->]
[<music21.note.Note E->, <music21.note.Note E->, <music21.note.Note E->, <music21.note.Note E->, <music21.note.Note F>, <music21.note.Note G->, <music21.note.Note F>, <music21.note.Note E->, <music21.note.Note B->, <music21.note.Note F>, <music21.note.Note F>, <music21.note.Note F>, <music21.note.Note C->]
Cutting 1 measures from the end
Equal
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]


In [64]:
quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')

out_str = ""
score_map_ind = 0
for i,r in quant_ann_df.iterrows():
    if score_map_ind<len(new_map):
        if r["type"].split(",")[0]=="db":
            out_str+= str(r["time"])+"\t" + str(new_map[score_map_ind]) +"," +str(r["type"]+ "\n") 
            score_map_ind +=1
        else:
            out_str += str(r["time"])+"\t" +str(r["type"]+ "\n") 
            
with open(Path(BASE_PATH,row.score_midi[:-17],"annotations_M.txt"), "w") as text_file:
    text_file.write(out_str)

In [5]:
for i, row in quant_df.iterrows():
    score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
    if len(score.parts[0].recurse().getElementsByClass(m21.repeat.RepeatMark)) > 0:
        e = m21.repeat.Expander(streamObj= score.parts[0])
        if not e.repeatBarsAreCoherent():
            print("Not coherent for",Path(BASE_PATH,row["score_xml"]))

Not coherent for ..\Schumann\Toccata\musicxml_cleaned.musicxml


In [66]:
def same_number_of_measures_with_repetitions(row):
    #compute the measure map in the score
    score = m21.converter.parse(Path(BASE_PATH,row["score_xml"]))
    #consider both hands because of some durations problems some measures has duration 0
    score_measures_r = score.parts[0].recurse().getElementsByClass(m21.stream.Measure)
    score_measures_l = score.parts[1].recurse().getElementsByClass(m21.stream.Measure)
    if len(score.parts[0].recurse().getElementsByClass(m21.repeat.RepeatMark)) == 0:
        #no repetitions, we are going linearly
        m_map = range(len(score_measures_r))
    elif row["score_xml"] in repetition_not_working.keys() :
        #in case of dc al fine, music21 does not work. We did it manually
        m_map = repetition_not_working[row["score_xml"]]
    else:
#         print("-------------")
#         print("Processing", row["score_xml"] )
        try:
            e = m21.repeat.Expander(streamObj= score.parts[0])
            m_map = e.measureMap()
        except:
            print("Processing", row["score_xml"] )
            print("Expansion Exception")
            return np.nan
        
    #consider pickup measure in the score
    if score_measures_r[0].paddingLeft > 0 or score_measures_l[0].paddingLeft > 0 :
        m_map = m_map[1:]
    else: # or if there is a pause as first event
        some_notes_on_first_db = False
        for part in score.parts:
            measure = part.recurse().getElementsByClass(m21.stream.Measure)[0]
            notes_on_db = [n for n in measure.recurse().notes if n.beat == 1]
            if len(notes_on_db)!= 0:
                some_notes_on_first_db = True
                break
        if not some_notes_on_first_db:
            m_map = m_map[1:]
        
    score_measures_n = len(m_map)
    
    #compute number of measures in the midi score (e.g. in the midi score annotations)
    quant_ann_df = pd.read_csv(Path(BASE_PATH,row.score_midi[:-17],"annotations.txt"),header=None, names=["time","time2","type"],sep='\t')
    len_ann= len([db_tp.split(",")[0] for db_tp in quant_ann_df["type"].tolist() if db_tp.split(",")[0] == "db" ])
      
    if len_ann== score_measures_n: # if it's already aligned, finish here to spare computation time
        return m_map
    else: # consider the splitted measures
        new_map = []
        i =0
        while i < len(m_map):
            # first check if the measure is an exception
            if (not exception_dict.get(row["score_xml"]) is None) and (m_map[i] in exception_dict[row["score_xml"]].keys()):
                connected_to = exception_dict[row["score_xml"]][m_map[i]]
                if len(connected_to) == 1:
                    new_map.append(str(m_map[i])+"-"+str(connected_to[0]))
                    i+=2
                elif len(connected_to) == 2:
                    new_map.append(str(m_map[i])+"-"+str(connected_to[0])+"-"+str(connected_to[1]))
                    i+=3
                elif len(connected_to) == 3:
                    new_map.append(str(m_map[i])+"-"+str(connected_to[0])+"-"+str(connected_to[1])+"-"+str(connected_to[2]))
                    i+=4
                else:
                    raise Exception("connected too with too many elements")
            else:
                measure_r = score_measures_r[m_map[i]]
                measure_l = score_measures_l[m_map[i]]
                m_dur = max([measure_r.duration.quarterLength,measure_l.duration.quarterLength]) #actual lenght
                m_ts_dur = max([measure_r.barDuration.quarterLength,measure_l.barDuration.quarterLength]) #lenght from the ts
                if (m_ts_dur == m_dur): 
                    new_map.append(m_map[i])
                    i+=1
                elif (m_ts_dur > m_dur) and i!= len(m_map)-1:
                    next_measure_r = score_measures_r[m_map[i+1]]
                    next_measure_l = score_measures_l[m_map[i+1]]
                    next_m_dur = max([next_measure_r.duration.quarterLength,next_measure_l.duration.quarterLength])
                    next_m_ts_dur= max([next_measure_r.barDuration.quarterLength,next_measure_l.barDuration.quarterLength])
    #                 if (m_dur + next_m_dur == m_ts_dur) and (next_m_ts_dur == m_ts_dur) : #two splitted measures
                    if (m_dur + next_m_dur == m_ts_dur) and (next_m_dur < next_m_ts_dur ) : #two splitted measures, good also if the tempo change if the duration is correct
                        new_map.append(str(m_map[i])+"-"+str(m_map[i+1]))
                        i+=2
                    else:
                        new_map.append(m_map[i])
                        i+=1
                else:
                    new_map.append(m_map[i])
                    i+=1
    
    #consider empty measures at the end of the score or measures with tied chords
    end_index = 0
    found_note = False
    while not found_note:    
        for part in score.parts:
            notes = list(part.recurse().getElementsByClass(m21.stream.Measure)[new_map[-end_index-1]].recurse().notes)
            if len(notes)!= 0:
                for n in notes:
                    if n.isChord:
                        for note in n.notes:
                            if (note.tie is None or note.tie.type == 'start'):
                                found_note = True
                                break
                        if found_note:
                            break
                    else :  #it's a note
                        if (n.tie is None or n.tie.type == 'start'):
                            found_note = True
                            break
        if not found_note:        
            end_index += 1
    if end_index > 0:
        new_map = new_map[:-end_index]
        
    if len_ann== len(new_map):
#         print("Equal")
        return(new_map)
    else:
        print("Different",row["score_xml"],len_ann, len(new_map) )
        return np.nan

In [67]:
quant_df["same_with_repeat"] = quant_df.apply(same_number_of_measures_with_repetitions,axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [68]:
quant_df[quant_df["same_with_repeat"].notna()].shape

(233, 12)

In [69]:
ok_list = quant_df[quant_df["same_with_repeat"].notna()]["score_xml"].tolist()

all_df[all_df["score_xml"].isin(ok_list)]

Unnamed: 0.1,Unnamed: 0,vnet_title,vnet_composer,score_xml,score_midi,maestro_midi,start,end,performed_midi_path,performed_annotations_path,same_length
0,0,Fugue_bwv_846,Bach,Bach/Fugue/bwv_846/musicxml_cleaned.musicxml,Bach/Fugue/bwv_846/midi_cleaned.mid,{maestro}/2006/MIDI-Unprocessed_19_R1_2006_01-...,144.567708,,Bach/Fugue/bwv_846/Shi05M.mid,Bach/Fugue/bwv_846/Shi05M_ann_cleaned.txt,True
1,1,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2008/MIDI-Unprocessed_02_R1_2008_01-...,69.317708,,Bach/Fugue/bwv_848/Denisova06M.mid,Bach/Fugue/bwv_848/Denisova06M_ann_cleaned.txt,True
2,2,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2011/MIDI-Unprocessed_08_R1_2011_MID...,74.795573,,Bach/Fugue/bwv_848/Lee01M.mid,Bach/Fugue/bwv_848/Lee01M_ann_cleaned.txt,True
3,3,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2017/MIDI-Unprocessed_049_PIANO049_M...,83.644792,,Bach/Fugue/bwv_848/LeeSH01M.mid,Bach/Fugue/bwv_848/LeeSH01M_ann_cleaned.txt,True
4,4,Fugue_bwv_848,Bach,Bach/Fugue/bwv_848/musicxml_cleaned.musicxml,Bach/Fugue/bwv_848/midi_cleaned.mid,{maestro}/2008/MIDI-Unprocessed_10_R1_2008_01-...,75.373698,,Bach/Fugue/bwv_848/Lin04M.mid,Bach/Fugue/bwv_848/Lin04M_ann_cleaned.txt,True
...,...,...,...,...,...,...,...,...,...,...,...
1063,1085,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/Na03.mid,Scriabin/Sonatas/5/Na03_ann_cleaned.txt,True
1064,1086,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,{maestro}/2009/MIDI-Unprocessed_09_R1_2009_01-...,,,Scriabin/Sonatas/5/Na07M.mid,Scriabin/Sonatas/5/Na07M_ann_cleaned.txt,True
1065,1087,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/TET02.mid,Scriabin/Sonatas/5/TET02_ann_cleaned.txt,True
1066,1088,Sonatas_5,Scriabin,Scriabin/Sonatas/5/musicxml_cleaned.musicxml,Scriabin/Sonatas/5/midi_cleaned.mid,,,,Scriabin/Sonatas/5/Yeletskiy02.mid,Scriabin/Sonatas/5/Yeletskiy02_ann_cleaned.txt,True


In [34]:
3 in None

TypeError: argument of type 'NoneType' is not iterable

In [70]:
np.arange(1,10) + np.arange(2,10)*2

ValueError: operands could not be broadcast together with shapes (9,) (8,) 