In [2]:
import pandas as pd
from pathlib import Path
import util as u
import importlib
from tqdm import tqdm
tqdm.pandas()

importlib.reload(u)

BASE_PATH = "../"

  from pandas import Panel


In [3]:
all_df = pd.read_csv(Path(BASE_PATH,"metadata.csv"))
all_df.columns

Index(['composer', 'title', 'folder', 'xml_score', 'midi_score',
       'midi_performance', 'performance_annotations', 'midi_score_annotations',
       'maestro_midi_performance', 'maestro_audio_performance', 'start', 'end',
       'audio_performance'],
      dtype='object')

# Check if all scores can be parsed by music21

In [4]:
quant_df = all_df.drop_duplicates(subset="midi_score_annotations",keep="first")

In [5]:
quant_df.progress_apply(lambda row: u.xmlscore_parsable_music21(str(Path(BASE_PATH,row["xml_score"]))),axis = 1)

100%|██████████| 235/235 [02:08<00:00,  1.83it/s]


0       None
1       None
10      None
15      None
16      None
        ... 
1045    None
1048    None
1052    None
1054    None
1057    None
Length: 235, dtype: object

# Check all midi scores

In [3]:
quant_df = all_df.drop_duplicates(subset="midi_score_annotations",keep="first")

In [5]:
#Check early/late db/b
quant_df.apply(lambda row: u.check_late_early_annot(str(Path(BASE_PATH,row["midi_score"])),str(Path(BASE_PATH,row["midi_score_annotations"]))),axis = 1)

0       None
1       None
10      None
15      None
16      None
        ... 
1046    None
1049    None
1053    None
1055    None
1058    None
Length: 235, dtype: object

In [7]:
#check correct type
quant_df.apply(lambda row: u.check_annotation_text(str(Path(BASE_PATH,row["midi_score_annotations"]))),axis = 1)

0       None
1       None
10      None
15      None
16      None
        ... 
1045    None
1048    None
1052    None
1054    None
1057    None
Length: 235, dtype: object

In [7]:
#check correct ratio beats downbeats (according to the local time signature)
quant_df.apply(lambda row: u.check_b_db_ratio(str(Path(BASE_PATH,row["midi_score_annotations"]))),axis = 1)

0       None
1       None
10      None
15      None
16      None
        ... 
1046    None
1049    None
1053    None
1055    None
1058    None
Length: 235, dtype: object

In [9]:
#check if the number of db in annotations is the same of the (corrected) number of measures in the score
quant_df.apply(lambda row: u.same_number_of_measures_with_repetitions(row["xml_score"],str(Path(BASE_PATH,row["midi_score_annotations"])),base_path=BASE_PATH),axis = 1)

0       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
1       [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
10      [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
15      [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
16      [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
                              ...                        
1046    [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, ...
1049    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
1053    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
1055    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
1058    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...
Length: 235, dtype: object

# Check all performances

In [6]:
#Check early/late db/b
all_df.progress_apply(lambda row: u.check_late_early_annot(str(Path(BASE_PATH,row["midi_performance"])),str(Path(BASE_PATH,row["performance_annotations"]))),axis = 1)

100%|██████████| 1067/1067 [03:23<00:00,  5.25it/s]


0       None
1       None
2       None
3       None
4       None
        ... 
1062    None
1063    None
1064    None
1065    None
1066    None
Length: 1067, dtype: object

In [8]:
#check correct type
all_df.progress_apply(lambda row: u.check_annotation_text(str(Path(BASE_PATH,row["performance_annotations"])),allow_W_flag = False),axis = 1)

100%|██████████| 1067/1067 [01:11<00:00, 14.84it/s]


0       None
1       None
2       None
3       None
4       None
        ... 
1062    None
1063    None
1064    None
1065    None
1066    None
Length: 1067, dtype: object

In [9]:
#check b,db correct ratio
all_df.progress_apply(lambda row: u.check_b_db_ratio(str(Path(BASE_PATH,row["performance_annotations"]))),axis = 1)

100%|██████████| 1067/1067 [01:15<00:00, 14.11it/s]


0       None
1       None
2       None
3       None
4       None
        ... 
1062    None
1063    None
1064    None
1065    None
1066    None
Length: 1067, dtype: object

In [10]:
#check inverted annotations
all_df.progress_apply(lambda row: u.check_inverted_annotations(str(Path(BASE_PATH,row["performance_annotations"]))),axis = 1)

100%|██████████| 1067/1067 [01:07<00:00, 15.80it/s]


0       None
1       None
2       None
3       None
4       None
        ... 
1062    None
1063    None
1064    None
1065    None
1066    None
Length: 1067, dtype: object

In [21]:
# check if performances and midi scores have the same number and type of annotations
all_df.progress_apply(lambda row: u.midi_score_and_perf_aligned(str(Path(BASE_PATH,row["performance_annotations"])),str(Path(BASE_PATH,row["midi_score_annotations"])),verbose=True),axis = 1)

 19%|█▉        | 206/1068 [00:11<01:43,  8.30it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/16-1/LuoJ03M_annotations.txt: 648 (ms) vs 646 (perf) 


 20%|██        | 214/1068 [00:12<02:59,  4.75it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/17-1/USHIKI05_annotations.txt: 1273 (ms) vs 1265 (perf) 


 21%|██        | 226/1068 [00:14<01:43,  8.12it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/17-2/KaszoS10_annotations.txt: 309 (ms) vs 360 (perf) 
Different length of annotations for ../Beethoven/Piano_Sonatas/17-3/KaszoS11M_annotations.txt: 1478 (ms) vs 1454 (perf) 


 23%|██▎       | 243/1068 [00:16<01:11, 11.54it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/18-3/KOLESO06_annotations.txt: 347 (ms) vs 299 (perf) 


 24%|██▎       | 251/1068 [00:17<02:48,  4.84it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/21-1/Sekino05M_annotations.txt: 1545 (ms) vs 1205 (perf) 


 24%|██▎       | 253/1068 [00:18<02:50,  4.79it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/21-1/YOO05_annotations.txt: 1545 (ms) vs 1205 (perf) 


 29%|██▉       | 313/1068 [00:27<01:22,  9.17it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/24-2/Lisiecki05M_annotations.txt: 365 (ms) vs 363 (perf) 


 33%|███▎      | 351/1068 [00:31<01:21,  8.75it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/29-4/DANILO01_annotations.txt: 1222 (ms) vs 1237 (perf) 


 33%|███▎      | 357/1068 [00:32<01:55,  6.13it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/3-1/LUO01_annotations.txt: 1064 (ms) vs 1056 (perf) 


 38%|███▊      | 405/1068 [00:35<00:54, 12.20it/s]

Different length of annotations for ../Beethoven/Piano_Sonatas/31-3_4/HuangSW10M_annotations.txt: 528 (ms) vs 491 (perf) 


 43%|████▎     | 464/1068 [00:41<01:01,  9.86it/s]

Different length of annotations for ../Chopin/Ballades/1/MunA19M_annotations.txt: 653 (ms) vs 650 (perf) 


 48%|████▊     | 516/1068 [00:45<00:29, 18.84it/s]

Different length of annotations for ../Chopin/Etudes_op_10/1/LuM02M_annotations.txt: 313 (ms) vs 312 (perf) 


 64%|██████▍   | 685/1068 [00:54<00:38,  9.95it/s]

Different length of annotations for ../Chopin/Scherzos/20/Wong04M_annotations.txt: 1873 (ms) vs 1861 (perf) 


 79%|███████▉  | 847/1068 [01:12<00:28,  7.88it/s]

Different length of annotations for ../Liszt/Mephisto_Waltz/Avdeeva07M_annotations.txt: 2602 (ms) vs 2599 (perf) 


 81%|████████  | 866/1068 [01:20<01:23,  2.41it/s]

Different length of annotations for ../Liszt/Sonata/Yeletskiy05M_annotations.txt: 2632 (ms) vs 2630 (perf) 


 82%|████████▏ | 871/1068 [01:21<00:59,  3.29it/s]

Different length of annotations for ../Liszt/Sonata/Zuber07M_annotations.txt: 2632 (ms) vs 2628 (perf) 


 84%|████████▍ | 895/1068 [01:23<00:17,  9.89it/s]

Different length of annotations for ../Liszt/Transcendental_Etudes/4/LeeE04M_annotations.txt: 682 (ms) vs 681 (perf) 


 88%|████████▊ | 943/1068 [01:26<00:06, 18.85it/s]

Different length of annotations for ../Rachmaninoff/Preludes_op_32/10/FONG08_annotations.txt: 241 (ms) vs 192 (perf) 


 90%|████████▉ | 956/1068 [01:27<00:07, 15.09it/s]

Different length of annotations for ../Ravel/Miroirs/4_Alborada_del_gracioso/CHOE02_annotations.txt: 547 (ms) vs 544 (perf) 


 90%|█████████ | 964/1068 [01:27<00:08, 11.57it/s]

Different length of annotations for ../Ravel/Miroirs/4_Alborada_del_gracioso/Shamray08_annotations.txt: 547 (ms) vs 544 (perf) 


 93%|█████████▎| 998/1068 [01:31<00:09,  7.23it/s]

Different length of annotations for ../Schubert/Impromptu_op142/1/Lisiecki10M_annotations.txt: 993 (ms) vs 1097 (perf) 


 94%|█████████▍| 1002/1068 [01:31<00:06,  9.64it/s]

Different length of annotations for ../Schubert/Impromptu_op142/3/Richardson07M_annotations.txt: 431 (ms) vs 415 (perf) 
Different length of annotations for ../Schubert/Impromptu_op142/3/SunY08M_annotations.txt: 431 (ms) vs 351 (perf) 


 97%|█████████▋| 1035/1068 [01:35<00:04,  7.85it/s]

Different length of annotations for ../Schumann/Kreisleriana/2/JohannsonP03_annotations.txt: 594 (ms) vs 486 (perf) 


 98%|█████████▊| 1044/1068 [01:35<00:01, 12.06it/s]

Different length of annotations for ../Schumann/Kreisleriana/5/JohannsonP06_annotations.txt: 518 (ms) vs 517 (perf) 


 99%|█████████▉| 1060/1068 [01:36<00:00, 12.81it/s]

Different length of annotations for ../Scriabin/Sonatas/5/ChernovA06M_annotations.txt: 1003 (ms) vs 979 (perf) 
Different length of annotations for ../Scriabin/Sonatas/5/FALIKS06_annotations.txt: 1003 (ms) vs 993 (perf) 


100%|█████████▉| 1064/1068 [01:37<00:00,  8.37it/s]

Different length of annotations for ../Scriabin/Sonatas/5/Ko07M_annotations.txt: 1003 (ms) vs 1000 (perf) 


100%|██████████| 1068/1068 [01:38<00:00, 10.88it/s]


0       True
1       True
2       True
3       True
4       True
        ... 
1063    True
1064    True
1065    True
1066    True
1067    True
Length: 1068, dtype: bool

# Check if all files exist

In [12]:
for i,row in all_df.iterrows():
    fields_to_check = ['xml_score', 'midi_score','midi_performance', 'performance_annotations', 'midi_score_annotations']
    for f in fields_to_check:
        my_file = Path(BASE_PATH,row[f])
        if not my_file.is_file():
            print("File not found",my_file)