# BPSD: Parse Score from musicXML to csv file

Johannes Zeitler (johannes.zeitler@audiolabs-erlangen.de), 2024

In [1]:
import music21
import os
import pandas as pd
from libfmp import c1, b
import numpy as np
from music21 import converter, repeat, midi
import matplotlib.pyplot as plt

In [2]:
def sub1(x):
    return x-1

In [3]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [4]:
xml_path = os.path.join("../", "0_RawData", "score_xml_unfolded")


out_path = os.path.join("../", "2_Annotations", "ann_score_note")

In [5]:
sonatas = [f[:-4] for f in os.listdir(xml_path) if ".xml" in f]
sonatas.sort()
print("found %i files"%(len(sonatas)))

found 32 files


In [None]:
for sonata in sonatas:
    print("processing %s"%(sonata))

    s = converter.parse(os.path.join(xml_path, "%s.xml"%(sonata)))

    s = s.stripTies()
    df = pd.DataFrame(columns=["start_meas", 
                               "end_meas", 
                               "duration_quarterLength",
                               "pitch", 
                               "pitchName", 
                               "timeSig",
                               "articulation",
                               "grace",
                               ])
    
    allNotes = []
    
    for note in s.recurse().notes: 
        measure=note.measureNumber
        timeSig = note._getTimeSignatureForBeat().numerator / note._getTimeSignatureForBeat().denominator
        beatDur = note.beatDuration.quarterLength/4
        noteDur = note.duration.quarterLength/4
        beat = note.beat - 1
        beatNr = beat*beatDur

        articulation = "_".join([art.name for art in note.articulations])

        offset_ratio = note.offset.as_integer_ratio()
        offset = note.offset*1.0
        
        allNotes.append(note)
        

        if note.isChord:
            for pitch in note.pitches:
                df_ = pd.DataFrame.from_dict({"start_meas" : [measure + beatNr/timeSig],
                               "end_meas"   : [measure + (beatNr + noteDur) / timeSig],
                               "duration_quarterLength": [max(note.duration.quarterLength*1.0, 1/16)],
                               "pitch"      : [pitch.midi],
                               "pitchName": [pitch.nameWithOctave],
                               "timeSig" : ["%i/%i"%(note._getTimeSignatureForBeat().numerator, note._getTimeSignatureForBeat().denominator)],
                               "articulation":[articulation],
                               "grace": [int(note.duration.isGrace*1.)],
                               })

                df = pd.concat([df, df_])
                if df_.duration_quarterLength[0] == 0:break

        else:
            pitch = note.pitch
            df_ = pd.DataFrame.from_dict({"start_meas" : [measure + beatNr/timeSig],
                           "end_meas"   : [measure + (beatNr + noteDur) / timeSig],
                           "duration_quarterLength": [max(note.duration.quarterLength*1.0, 1/16)],
                           "pitch"      : [pitch.midi],
                           "pitchName": [pitch.nameWithOctave],
                           "timeSig" : ["%i/%i"%(note._getTimeSignatureForBeat().numerator, note._getTimeSignatureForBeat().denominator)],
                           "articulation":[articulation],
                           "grace": [int(note.duration.isGrace*1.)],
                           })

            df = pd.concat([df, df_])
            if df_.duration_quarterLength[0] == 0:break


    df.sort_values("start_meas", inplace=True)

    if (np.min(df["start_meas"]) - 1.0) > 0:
        print("subtract 1 measure (Auftakt)")
        df[["start_meas", "end_meas"]] = df[["start_meas", "end_meas"]].apply(sub1)
    
    print(df.sort_values(by="start_meas").iloc[:20])
    
    s.recurse().measures(0,2).plot("pianoroll")
    plt.show()

    fr = 32
    noteList = [[row.start_meas, row.end_meas-row.start_meas, row.pitch, 64, 0] for _, row in df.iterrows()]

    nMeasures = 20

    pnoRoll,_ = b.b_sonification.list_to_pitch_activations(noteList, fr*nMeasures, fr)

    plt.figure(figsize=(10,6))
    plt.imshow(pnoRoll, origin='lower', extent=[0,nMeasures, 0, 127], aspect='auto', cmap='gray_r', interpolation="None")
    plt.grid()
    plt.ylim([20,90])
    plt.xlim([0.5,15])
    plt.show()

    print("used articulations: %s"%(",".join(set(df.articulation))))



    df.to_csv(os.path.join(out_path, "%s.csv"%(sonata)), sep=";", header=True, index=False, float_format='%07.03f')