# Dagstuhl ChoirSet: Evaluation of F0-Annotations

In [None]:
import numpy as np
import pandas as pd
import mir_eval
import os
import glob

In [None]:
tony_path = './DagstuhlChoirSet/annotations_csv_F0_manual/'
pyin_path =  './DagstuhlChoirSet/annotations_csv_F0_PYIN/'
crepe_path = './DagstuhlChoirSet/annotations_csv_F0_CREPE/'

In [None]:
file_list = np.sort(glob.glob((pyin_path + '*.csv')))

eval_pyin_lrx = np.empty((0,5))
eval_pyin_hsm = np.empty((0,5))
eval_pyin_dyn = np.empty((0,5))
eval_crepe_lrx = np.empty((0,5))
eval_crepe_hsm = np.empty((0,5))
eval_crepe_dyn = np.empty((0,5))

thr_save = []

for pyin_file in file_list:
    fn = os.path.basename(pyin_file)[:-4]
    
    anno_file = glob.glob(tony_path + fn[:-3] + '*.csv')
    if not anno_file:
        continue
    else:
        anno_file = anno_file[0]
    crepe_file = crepe_path + fn + '.csv'
    
    # read files
    header = None
    names = None
    anno = pd.read_csv(anno_file, sep=',', keep_default_na=False, header=header, names=names).values
    pyin = pd.read_csv(pyin_file, sep=',', keep_default_na=False, header=header, names=names).values
    crepe = pd.read_csv(crepe_file, sep=',', keep_default_na=False, header=header, names=names).values
    
    # pad annotations with zeros
    pref = np.arange(0, anno[0, 0], anno[1, 0]-anno[0, 0]).reshape(-1,1)
    anno = np.vstack((np.hstack((pref, np.zeros(pref.shape))), anno))
    
    # remove discontinuities in time axis
    #anno = np.delete(anno, np.where(anno[:-1, 0] >= anno[1:, 0])[0], axis=0)
    anno = np.delete(anno, np.where(np.round(np.abs(anno[1:, 0]-anno[:-1, 0]), 4) != 0.0058 )[0], axis=0)

    # resample to common time axis
    kind = 'cubic'
    t = np.arange(0, np.min([anno[-1, 0], pyin[-1, 0], crepe[-1, 0]]), 0.01)    
    anno_res, anno_va = mir_eval.melody.resample_melody_series(anno[:, 0], anno[:, 1], anno[:,1]>0, 
                                                               t, kind=kind)
    pyin_res, pyin_va = mir_eval.melody.resample_melody_series(pyin[:, 0], np.abs(pyin[:, 1]), pyin[:, 1]>0, 
                                                               t, kind=kind)
    
    
    crepe_res, crepe_va = mir_eval.melody.resample_melody_series(crepe[:, 0], crepe[:, 1], crepe[:, 2], 
                                                                 t, kind=kind)
    
    # evaluate
    tol_cents = 50
    eval_pyin = mir_eval.melody.evaluate(t, anno_res, t, pyin_res*pyin_va, cent_tolerance=tol_cents)
    
    # find optimal threshold for CREPE
    thresholds = np.arange(0, 1, 0.01)
    crepe_eval_sweep = []
    for thr in thresholds:
        crepe_eval_sweep.append(mir_eval.melody.evaluate(t, anno_res, t, crepe_res*(crepe_va>=thr), cent_tolerance=tol_cents)['Overall Accuracy'])
    
    thr_opt = thresholds[np.argmax(crepe_eval_sweep)]
    print(fn, thr_opt)
    thr_save.append(thr_opt)
    eval_crepe = mir_eval.melody.evaluate(t, anno_res, t, crepe_res*(crepe_va>=thr_opt), cent_tolerance=tol_cents)
    
    if "LRX" in fn:
        eval_pyin_lrx = np.append(eval_pyin_lrx, np.array([[eval_pyin['Voicing Recall'], 
                                                            eval_pyin['Voicing False Alarm'], 
                                                            eval_pyin['Raw Pitch Accuracy'], 
                                                            eval_pyin['Raw Chroma Accuracy'], 
                                                            eval_pyin['Overall Accuracy']]]), axis=0)
        eval_crepe_lrx = np.append(eval_crepe_lrx, np.array([[eval_crepe['Voicing Recall'], 
                                                            eval_crepe['Voicing False Alarm'], 
                                                            eval_crepe['Raw Pitch Accuracy'], 
                                                            eval_crepe['Raw Chroma Accuracy'], 
                                                            eval_crepe['Overall Accuracy']]]), axis=0)
    elif "DYN" in fn:
        eval_pyin_dyn = np.append(eval_pyin_dyn, np.array([[eval_pyin['Voicing Recall'], 
                                                            eval_pyin['Voicing False Alarm'], 
                                                            eval_pyin['Raw Pitch Accuracy'], 
                                                            eval_pyin['Raw Chroma Accuracy'], 
                                                            eval_pyin['Overall Accuracy']]]), axis=0)
        eval_crepe_dyn = np.append(eval_crepe_dyn, np.array([[eval_crepe['Voicing Recall'], 
                                                            eval_crepe['Voicing False Alarm'], 
                                                            eval_crepe['Raw Pitch Accuracy'], 
                                                            eval_crepe['Raw Chroma Accuracy'], 
                                                            eval_crepe['Overall Accuracy']]]), axis=0)
    elif "HSM" in fn:
        eval_pyin_hsm = np.append(eval_pyin_hsm, np.array([[eval_pyin['Voicing Recall'], 
                                                            eval_pyin['Voicing False Alarm'], 
                                                            eval_pyin['Raw Pitch Accuracy'], 
                                                            eval_pyin['Raw Chroma Accuracy'], 
                                                            eval_pyin['Overall Accuracy']]]), axis=0)
        eval_crepe_hsm = np.append(eval_crepe_hsm, np.array([[eval_crepe['Voicing Recall'], 
                                                            eval_crepe['Voicing False Alarm'], 
                                                            eval_crepe['Raw Pitch Accuracy'], 
                                                            eval_crepe['Raw Chroma Accuracy'], 
                                                            eval_crepe['Overall Accuracy']]]), axis=0)

In [None]:
eval_pyin_lrx_mean = np.mean(eval_pyin_lrx, axis=0)
eval_crepe_lrx_mean = np.mean(eval_crepe_lrx, axis=0)
eval_pyin_hsm_mean = np.mean(eval_pyin_hsm, axis=0)
eval_crepe_hsm_mean = np.mean(eval_crepe_hsm, axis=0)
eval_pyin_dyn_mean = np.mean(eval_pyin_dyn, axis=0)
eval_crepe_dyn_mean = np.mean(eval_crepe_dyn, axis=0)

eval_pyin_lrx_std = np.std(eval_pyin_lrx, axis=0)
eval_crepe_lrx_std = np.std(eval_crepe_lrx, axis=0)
eval_pyin_hsm_std = np.std(eval_pyin_hsm, axis=0)
eval_crepe_hsm_std = np.std(eval_crepe_hsm, axis=0)
eval_pyin_dyn_std = np.std(eval_pyin_dyn, axis=0)
eval_crepe_dyn_std = np.std(eval_crepe_dyn, axis=0)

In [None]:
# PYIN
print('Voicing Recall & Voicing False Alarm & Raw Pitch Accuracy & Raw Chroma Accuracy & Overall Accuracy\\\\')
print('LRX & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f)\\\\' %(eval_pyin_lrx_mean[0], eval_pyin_lrx_std[0],
                                                                                     eval_pyin_lrx_mean[1], eval_pyin_lrx_std[1],
                                                                                     eval_pyin_lrx_mean[2], eval_pyin_lrx_std[2],
                                                                                     eval_pyin_lrx_mean[3], eval_pyin_lrx_std[3],
                                                                                     eval_pyin_lrx_mean[4], eval_pyin_lrx_std[4]))
print('HSM & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f)\\\\' %(eval_pyin_hsm_mean[0], eval_pyin_hsm_std[0],
                                                                                     eval_pyin_hsm_mean[1], eval_pyin_hsm_std[1],
                                                                                     eval_pyin_hsm_mean[2], eval_pyin_hsm_std[2],
                                                                                     eval_pyin_hsm_mean[3], eval_pyin_hsm_std[3],
                                                                                     eval_pyin_hsm_mean[4], eval_pyin_hsm_std[4]))
print('DYN & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f)\\\\' %(eval_pyin_dyn_mean[0], eval_pyin_dyn_std[0],
                                                                                     eval_pyin_dyn_mean[1], eval_pyin_dyn_std[1],
                                                                                     eval_pyin_dyn_mean[2], eval_pyin_dyn_std[2],
                                                                                     eval_pyin_dyn_mean[3], eval_pyin_dyn_std[3],
                                                                                     eval_pyin_dyn_mean[4], eval_pyin_dyn_std[4]))

In [None]:
# CREPE
print('Voicing Recall & Voicing False Alarm & Raw Pitch Accuracy & Raw Chroma Accuracy & Overall Accuracy\\\\')
print('LRX & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f)\\\\' %(eval_crepe_lrx_mean[0], eval_crepe_lrx_std[0],
                                                                                     eval_crepe_lrx_mean[1], eval_crepe_lrx_std[1],
                                                                                     eval_crepe_lrx_mean[2], eval_crepe_lrx_std[2],
                                                                                     eval_crepe_lrx_mean[3], eval_crepe_lrx_std[3],
                                                                                     eval_crepe_lrx_mean[4], eval_crepe_lrx_std[4]))
print('HSM & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f)\\\\' %(eval_crepe_hsm_mean[0], eval_crepe_hsm_std[0],
                                                                                     eval_crepe_hsm_mean[1], eval_crepe_hsm_std[1],
                                                                                     eval_crepe_hsm_mean[2], eval_crepe_hsm_std[2],
                                                                                     eval_crepe_hsm_mean[3], eval_crepe_hsm_std[3],
                                                                                     eval_crepe_hsm_mean[4], eval_crepe_hsm_std[4]))
print('DYN & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f) & %.2f (%.2f)\\\\' %(eval_crepe_dyn_mean[0], eval_crepe_dyn_std[0],
                                                                                     eval_crepe_dyn_mean[1], eval_crepe_dyn_std[1],
                                                                                     eval_crepe_dyn_mean[2], eval_crepe_dyn_std[2],
                                                                                     eval_crepe_dyn_mean[3], eval_crepe_dyn_std[3],
                                                                                     eval_crepe_dyn_mean[4], eval_crepe_dyn_std[4]))