In [3]:
from pathlib import Path
import pandas as pd
from partitura.io.musescore import find_musescore
import subprocess

In [None]:
# open the generic tsv file in pandas
with open(Path(r"path/to/romantic_piano_corpus.metadata.tsv")) as f:
    romantic_df = pd.read_csv(f, sep='\t')

In [6]:
# open the debussy tsv file in pandas
with open(Path(r"path/to/debussy/concatenated_metadata.tsv")) as f:
    debussy_df = pd.read_csv(f, sep='\t')

In [None]:
# open the mozart tsv file in pandas
with open(Path(r"path/to/mozart/metadata.tsv")) as f:
    mozart_df = pd.read_csv(f, sep='\t')

# Merge the datasets and convert to musicxml

The three datasets above contains all non-overlapping pieces, except the `debussy_suite_bergamasque`, which is both in the debussy and romantic corpus.
We need to avoid duplicating it.

In [1]:
def convert_with_musescore(input_path, output_path, mscore_exec):
    cmd = [mscore_exec, "-o", output_path, input_path, "-f"]
    ps = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
    if ps.returncode != 0:
        print("Command {} failed with code {}. MuseScore " "error messages:\n {}").format(cmd, ps.returncode, ps.stderr.decode("UTF-8"))

In [None]:
romantic_basepath = r"path/to/romantic_piano_corpus"
debussy_basepath = r"path/to/debussy_piano"
mozart_basepath = r"path/to/mozart_piano_sonatas"

musescore = find_musescore()
print(musescore)

# iterate through debussy_df
for i,row in debussy_df.iterrows():
    in_path = str(Path(debussy_basepath,row['corpus'],"MS3",row['fname']+".mscx"))
    out_path = str(Path(r"C:\Users\fosca\Desktop\JKU\piano_corpora_dcml\scores\debussy_corpus",row['fname']+".musicxml"))
    # add out folder path if it does not exist
    if not Path(out_path).parent.exists():
        Path(out_path).parent.mkdir(parents=True)
    # convert to musicxml using musescore
    convert_with_musescore(in_path,out_path,musescore)
    print("Piece converted",row['corpus'],row['fname'],out_path)

# iterate through romantic_df
for i,row in romantic_df.iterrows():
    # exclude debussy_suite_bergamasque since it is already included in the debussy corpus
    if row["corpus"]!= "debussy_suite_bergamasque":
        in_path = str(Path(romantic_basepath,row['corpus'],"MS3",row['piece']+".mscx"))
        out_path = str(Path(r"C:\Users\fosca\Desktop\JKU\piano_corpora_dcml\scores",row['corpus'],row['piece']+".musicxml"))
        # add out folder path if it does not exist
        if not Path(out_path).parent.exists():
            Path(out_path).parent.mkdir(parents=True)
        # convert to musicxml using musescore
        convert_with_musescore(in_path,out_path,musescore)
        print("Piece converted",row['corpus'],row['piece'],out_path)

# iterate through mozart_df
for i,row in mozart_df.iterrows():
    in_path = str(Path(mozart_basepath,"MS3",row['fname']+".mscx"))
    out_path = str(Path(r"C:\Users\fosca\Desktop\JKU\piano_corpora_dcml\scores\mozart_sonatas",row['fname']+".musicxml"))
    # add out folder path if it does not exist
    if not Path(out_path).parent.exists():
        Path(out_path).parent.mkdir(parents=True)
    # convert to musicxml using musescore
    convert_with_musescore(in_path,out_path,musescore)
    print("Piece converted",row['fname'],out_path)

# Check if the scores are parsable by partitura

In [4]:
import partitura as pt
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [5]:
counter = 0
total_counter= 0

for folder in Path("scores").iterdir():
    for score in folder.iterdir():
        total_counter += 1
        try:
            score = pt.load_score(str(score))
            counter += 1
        except Exception as e:
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Error in score:", score)
            print(e)


print("Number of parsable scores :", counter)
print("Total number of scores:", total_counter)