In [1]:
import numpy as np
import mir_eval.transcription
import os
from IPython.display import display, Markdown
from midiutil import MIDIFile
from scipy.signal import find_peaks

from transcribe_factorization import transcribe_activations_dynamic_AD
import transcribe_factorization as tf
import evaluate_transcription as et
import STFT
import pandas as pd

ModuleNotFoundError: No module named 'mir_eval'

In [18]:
def printmd(string):
    display(Markdown(string))

def compute_scores_database(piano_type, H_normalization = False, fusion_peak = False):
    path_computed_H = "../data_persisted/activation_AD"
    path_songs = "/Volumes/TOSHIBA/MAPS/{}/MUS".format(piano_type)
    
    time_limit = 30
    beta = 1
    model_AD = True
    note_intensity = "M"

    codebook = range(21, 109)
    onset_tolerance = 50/1000
    
#     a = np.arange(1e-3, 1e-2, 1e-3)
#     b = np.arange(1e-4, 1e-3, 1e-4)
#     c = np.arange(1e-5, 1e-4, 1e-5)
#     f = np.arange(3e-1, 8e-1, 2e-2)
    f = np.arange(1e-3, 1e-1, 1e-3)
#     f = np.arange(1e-2, 6e-1, 2e-2)

    listthres = np.r_[f]
    files = os.listdir(path_songs)
    list_files_wav = []
    for it_files in files:
        if it_files.split(".")[-1] == "wav" and it_files[0] == "M":
            list_files_wav.append(it_files)

    all_res = []
    for a_song in list_files_wav:
#         song_name = a_song.replace(".wav", "")
        song_name = a_song[:-4]
#         print("processing piano song: {}".format(song_name))
#         path_this_song = "{}/{}".format(path_songs, a_song)
        path_this_song = path_songs + "/" + a_song
        stft = STFT.STFT(path_this_song, time = time_limit, model_AD=model_AD, channel = 0)

        X = stft.get_magnitude_spectrogram()

#         annot_name = a_song.replace("wav","txt")
#         annot_this_song = "{}/{}".format(path_songs, annot_name)
        annot_this_song = path_songs + "/" + a_song[:-3] + "txt"
        note_annotations = et.load_ref_in_array(annot_this_song, time_limit=time_limit)
        ref = np.array(note_annotations, float)
        ref_pitches = np.array(ref[:,2], int)
        try:
            #res_each_song = []
            res_a_param = []
            for mode in ["attack", "normal"]:
                H_persisted_name = "AD_{}_activations_song_{}_W_learned_{}_beta_{}_stftAD_{}_intensity_W_{}_time_limit_{}".format(mode, song_name, piano_type, beta, model_AD, note_intensity, time_limit)
                H = np.load("{}/{}.npy".format(path_computed_H, H_persisted_name), allow_pickle = True)
                res_every_thresh = []
                for threshold in listthres:
                    if fusion_peak:
                        prediction, _ = transcribe_activations_dynamic_AD(codebook, H, stft, threshold, H_normalization = H_normalization)
                    else:
                        prediction, _ = tf.transcribe_activations_dynamic(codebook, H, stft, threshold, H_normalization = H_normalization)
                    est = np.array(prediction, float)
                    if est.size > 0:
                        est_pitches = np.array(est[:,2], int)
                        (prec, rec, f_mes, _) = mir_eval.transcription.precision_recall_f1_overlap(ref[:,0:2], ref_pitches, est[:,0:2], est_pitches, offset_ratio = None, onset_tolerance = onset_tolerance)
                        matching = mir_eval.transcription.match_notes(ref[:,0:2], ref_pitches, est[:,0:2],est_pitches, onset_tolerance=onset_tolerance,offset_ratio=None)
                        TP = len(matching)
                        try:
                            FP = int(TP * (1 - prec) / prec)
                        except ZeroDivisionError:
                            FP = 0
                        try:
                            FN = int(TP * (1 - rec) / rec)
                        except ZeroDivisionError:
                            FN = 0
                        acc = et.accuracy(TP,FP,FN)
                    else:
                        prec, rec, f_mes, acc, TP, FP, FN = (0,0,0,0,0,0,0)
                    res_every_thresh.append([prec, rec, f_mes, acc, TP, FP, FN])
                res_a_param.append(res_every_thresh)
                res_each_song = np.array(res_every_thresh)
                best_ind = np.argmax(res_each_song[:, 2])
            all_res.append(res_a_param)

        except FileNotFoundError:
            print("\033[91m This song failed: {} \033[00m".format(a_song)) 
            pass
    np_all_res = np.array(all_res)
    the_t = []
    for t in ["attack", "normal"]:
        the_t.append("H: {}".format(t))
    index_pandas = the_t
    col = ['Best threshold','Precision', 'Recall', 'F measure','Accuracy','True Positives','False Positives','False Negatives']
    lines = []
    lines_opt_thresh = []
    for cond in range(len(index_pandas)):
        all_thresh = []
        for each_thresh in range(len(listthres)):
            all_thresh.append(np.mean(np_all_res[:,cond,each_thresh,2]))
        best_thresh_idx = np.argmax(all_thresh)
        this_line = [listthres[best_thresh_idx]]
        for i in range(len(col) - 1):# - 1 because threshold
            this_line.append(round(np.mean(np_all_res[:,cond,best_thresh_idx,i]), 4))
        lines.append(this_line)
        
        best_val = []
        for each_song in range(len(list_files_wav)):
            best_thresh_idx = np.argmax(np_all_res[each_song,cond,:,2])
            best_val.append([round(np_all_res[each_song,cond,best_thresh_idx,i], 4) for i in range(len(col) - 1)])
        lines_opt_thresh.append([round(np.mean(np.array(best_val)[:,i]),4) for i in range(len(col) - 1)])
    
    print("Piano: {}".format(piano_type))
    print("When averaging each threshold on all MAPS")
    df = pd.DataFrame(np.array(lines), columns = col, index = index_pandas)
    display(df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))

    print("When optimizing the threshold on each song")
    best_df = pd.DataFrame(np.array(lines_opt_thresh), columns = col[1:], index = index_pandas)
    display(best_df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))

In [19]:
for piano_type in ["AkPnCGdD","ENSTDkCl","AkPnBcht","AkPnBsdf","AkPnStgb","ENSTDkAm","SptkBGAm","StbgTGd2"]:
    compute_scores_database(piano_type, H_normalization = False, fusion_peak= False)
    

Piano: AkPnCGdD
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.072,0.5382,0.6897,0.597,0.4286,157.8667,138.0333,93.5667
H: normal,0.031,0.7156,0.7893,0.7439,0.5997,185.6667,76.2333,65.6333


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.5404,0.7291,0.6166,0.4491,169.9,149.3,81.6
H: normal,0.7578,0.79,0.771,0.6347,189.1,66.3,62.1


Piano: ENSTDkCl
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.021,0.8724,0.7702,0.8145,0.6967,180.1333,26.6667,64.7
H: normal,0.015,0.6295,0.7063,0.6612,0.5004,168.3,94.8667,76.5


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.8809,0.7858,0.8287,0.717,184.8667,26.4667,60.0
H: normal,0.652,0.7061,0.676,0.5184,169.4333,89.1667,75.2667


Piano: AkPnBcht
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.097,0.6774,0.6582,0.6588,0.5002,179.1,91.9333,119.8
H: normal,0.041,0.7483,0.802,0.77,0.64,226.2667,90.8667,72.7667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.6567,0.7057,0.6767,0.519,194.7333,109.4667,104.2
H: normal,0.7757,0.7998,0.7863,0.6607,225.8667,75.4,73.1


Piano: AkPnBsdf
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.035,0.7177,0.7164,0.7074,0.5572,190.3333,84.9667,88.9333
H: normal,0.025,0.7345,0.7469,0.7347,0.59,199.6,75.1333,79.7


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.7644,0.7219,0.7395,0.5965,194.1,73.5667,85.1667
H: normal,0.7498,0.7618,0.7538,0.6136,205.7,75.4,73.5667


Piano: AkPnStgb
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.083,0.724,0.6457,0.6696,0.5165,175.3333,73.3333,118.5333
H: normal,0.047,0.7843,0.7123,0.7385,0.6008,196.7333,59.5,97.0667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.706,0.6869,0.694,0.5422,190.6,88.9333,103.3
H: normal,0.7962,0.7291,0.758,0.6234,205.8333,61.2,87.9667


Piano: ENSTDkAm
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.011,0.7096,0.6638,0.6818,0.5282,185.3,73.8,106.4667
H: normal,0.004,0.3237,0.4855,0.3857,0.244,143.5667,278.6333,148.1


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.7206,0.6869,0.7015,0.5509,193.5333,79.7667,98.3667
H: normal,0.3442,0.4894,0.4008,0.2565,146.8,275.3,144.9


Piano: SptkBGAm
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.071,0.6666,0.6304,0.6377,0.4738,158.4333,85.5,110.7
H: normal,0.04,0.7356,0.7049,0.7141,0.5632,179.0667,68.2667,90.0333


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.669,0.6619,0.6612,0.5008,168.1,89.7667,100.9667
H: normal,0.7414,0.7254,0.7308,0.584,187.4667,71.0333,81.4667


Piano: StbgTGd2
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.043,0.6729,0.6849,0.6685,0.5085,158.4,86.2,86.5667
H: normal,0.028,0.7592,0.7494,0.7448,0.6022,175.7333,62.5667,69.2667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.716,0.696,0.7023,0.5479,163.2333,70.8333,81.6667
H: normal,0.7664,0.7764,0.77,0.6339,186.0,60.7333,59.0


### peak fusion

In [13]:
for piano_type in ["AkPnCGdD","ENSTDkCl","AkPnBcht","AkPnBsdf","AkPnStgb","ENSTDkAm","SptkBGAm","StbgTGd2"]:
    compute_scores_database(piano_type, H_normalization = False, fusion_peak=True)

Piano: AkPnCGdD
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.039,0.8243,0.7996,0.8063,0.6872,187.0,47.2667,64.2667
H: normal,0.022,0.7669,0.6521,0.7013,0.5472,161.2,52.5,90.1667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.8471,0.8133,0.8281,0.719,191.7333,40.7667,59.6333
H: normal,0.8066,0.6612,0.7244,0.575,163.3333,41.7667,88.0333


Piano: ENSTDkCl
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.021,0.7679,0.5854,0.6618,0.5039,143.6,40.1333,101.2667
H: normal,0.013,0.6168,0.5264,0.5654,0.403,129.3333,75.6667,115.5333


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.7714,0.5966,0.6713,0.5143,146.9667,41.2333,97.9333
H: normal,0.6291,0.5415,0.5801,0.4172,134.2,77.8333,110.8333


Piano: AkPnBcht
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.043,0.8456,0.8084,0.8235,0.7164,228.9667,57.5333,69.8333
H: normal,0.03,0.8004,0.7079,0.7478,0.6093,205.2,69.3667,93.8333


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.8531,0.8202,0.8354,0.7329,231.0,48.9333,67.8667
H: normal,0.8171,0.7166,0.7623,0.6292,205.3667,54.1333,93.5


Piano: AkPnBsdf
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.017,0.7863,0.7465,0.7608,0.6229,202.3,62.4,76.8
H: normal,0.014,0.7496,0.684,0.7109,0.5592,185.8,69.5,93.5


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.829,0.7324,0.7762,0.6421,198.3667,46.4667,80.9333
H: normal,0.7771,0.6883,0.7289,0.5815,186.2333,58.2333,93.1667


Piano: AkPnStgb
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.04,0.8273,0.717,0.7631,0.634,199.7333,49.9667,94.0
H: normal,0.031,0.7895,0.633,0.6981,0.5478,179.9667,55.9333,113.8


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.821,0.7494,0.7807,0.6555,209.2,53.0,84.7
H: normal,0.7856,0.6593,0.7143,0.5664,188.3,59.5,105.5


Piano: ENSTDkAm
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.015,0.4425,0.3521,0.3902,0.2493,107.4667,119.7,184.4667
H: normal,0.002,0.2288,0.2784,0.25,0.1498,87.9,268.3,204.1667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.4336,0.3791,0.4012,0.2578,118.5333,146.2667,173.3667
H: normal,0.2658,0.2608,0.2607,0.1578,83.4,221.7667,208.5


Piano: SptkBGAm
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.037,0.8095,0.7318,0.7642,0.6302,186.3333,50.6333,82.6
H: normal,0.024,0.7417,0.6397,0.6836,0.5264,165.9667,64.0333,103.0667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.8189,0.7498,0.7808,0.6506,192.8,47.7333,76.2667
H: normal,0.7534,0.6563,0.6977,0.5432,170.6667,62.2667,98.4


Piano: StbgTGd2
When averaging each threshold on all MAPS


Unnamed: 0,Best threshold,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.017,0.7999,0.7685,0.7765,0.6451,181.7667,53.7333,63.1
H: normal,0.017,0.789,0.6664,0.716,0.5667,159.5,50.7667,85.5667


When optimizing the threshold on each song


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
H: attack,0.8539,0.7557,0.8,0.6754,179.9333,33.0,65.0333
H: normal,0.8222,0.6731,0.7386,0.5938,163.1667,37.6667,81.8
