In [1]:
import numpy as np
import mir_eval.transcription
import os
from IPython.display import display, Markdown

import transcribe_factorization as tf
import evaluate_transcription as et
import STFT
import pandas as pd

In [2]:
def printmd(string):
    display(Markdown(string))

def compute_scores_database(piano_type_W, piano_type_H, H_normalization = False, adaptative_threshold = True):
    path_computed_H = "../data_persisted/activations"
    path_songs = "C:/Users/amarmore/Desktop/Audio samples/MAPS/{}/MUS".format(piano_type_H)
    printmd("Piano for W: {}, and for H: {}".format(piano_type_W, piano_type_H))

    time_limit = 30
    beta = 1
    init = "L1"
    model_AD = True
    note_intensity = "M"
    itmax_H = 100
    tol = 1e-8
    codebook = range(21, 109)
    onset_tolerance = 50/1000
    
    #a = np.arange(1e-3, 1e-2, 1e-3)
    #b = np.arange(1e-4, 1e-3, 1e-4)
    #c = np.arange(1e-5, 1e-4, 1e-5)
    #d = np.arange(1e-6, 1e-5, 1e-6)
    f = np.arange(1e-2, 4e-1, 1e-2)
    #g = np.arange(0.3, 0.5, 0.1)
    #h = np.arange(1, 1.5, 0.1)
    #j = np.arange(1e-7, 1e-6, 1e-7)
    #listthres = np.r_[h[::-1], g[::-1], f[::-1], a[::-1], b[::-1], c[::-1], d[::-1], j[::-1]]
    listthres = np.r_[f[::-1]]

    files = os.listdir(path_songs)
    list_files_wav = []
    for it_files in files:
        if it_files.split(".")[-1] == "wav":
            list_files_wav.append(it_files)

    all_res = []
    for a_song in list_files_wav:
        song_name = a_song.replace(".wav", "")
        #print("processing piano song: {}".format(song_name))
        path_this_song = "{}/{}".format(path_songs, a_song)
        stft = STFT.STFT(path_this_song, time = time_limit, model_AD=model_AD, channel = 0)

        X = stft.get_magnitude_spectrogram()

        annot_name = a_song.replace("wav","txt")
        annot_this_song = "{}/{}".format(path_songs, annot_name)
        note_annotations = et.load_ref_in_array(annot_this_song, time_limit=time_limit)
        ref = np.array(note_annotations, float)
        ref_pitches = np.array(ref[:,2], int)
        try:
            #res_each_song = []
            res_a_param = []
            for T in [5, 10, 20]:
                H_persisted_name = "activations_song_{}_W_learned_{}_beta_{}_T_{}_init_{}_stftAD_{}_itmax_{}_intensity_W_{}_time_limit_{}_tol_{}".format(song_name, piano_type_W, beta, T, init, model_AD, itmax_H, note_intensity, time_limit, tol)
                H = np.load("{}/{}.npy".format(path_computed_H, H_persisted_name), allow_pickle = True)
                res_every_thresh = []
                for threshold in listthres:
                    if adaptative_threshold:
                        prediction, _ = tf.transcribe_activations_dynamic(codebook, H, stft, threshold, H_normalization = H_normalization)
                    else:
                        prediction, _ = tf.transcribe_activations(codebook, H, stft, threshold, H_normalization = H_normalization)
                    est = np.array(prediction, float)
                    if est.size > 0:
                        est_pitches = np.array(est[:,2], int)
                        (prec, rec, f_mes, _) = mir_eval.transcription.precision_recall_f1_overlap(ref[:,0:2], ref_pitches, est[:,0:2], est_pitches, offset_ratio = None, onset_tolerance = onset_tolerance)
                        matching = mir_eval.transcription.match_notes(ref[:,0:2], ref_pitches, est[:,0:2],est_pitches, onset_tolerance=onset_tolerance,offset_ratio=None)
                        TP = len(matching)
                        try:
                            FP = int(TP * (1 - prec) / prec)
                        except ZeroDivisionError:
                            FP = 0
                        try:
                            FN = int(TP * (1 - rec) / rec)
                        except ZeroDivisionError:
                            FN = 0
                        acc = et.accuracy(TP,FP,FN)
                    else:
                        prec, rec, f_mes, acc, TP, FP, FN = (0,0,0,0,0,0,0)
                    res_every_thresh.append([prec, rec, f_mes, acc, TP, FP, FN])
                res_a_param.append(res_every_thresh)
            #res_each_song.append(res_a_param)

            all_res.append(res_a_param)

        except FileNotFoundError:
            print("\033[91m This song failed: {} \033[00m".format(a_song)) 
            pass
    np_all_res = np.array(all_res)
    the_t = []
    for t in [5, 10, 20]:
        the_t.append("T: {}".format(t))
    index_pandas = the_t
    col = ['Best threshold','Precision', 'Recall', 'F measure','Accuracy','True Positives','False Positives','False Negatives']
    lines = []
    lines_opt_thresh = []
    for cond in range(len(index_pandas)):
        all_thresh = []
        for each_thresh in range(len(listthres)):
            all_thresh.append(np.mean(np_all_res[:,cond,each_thresh,2]))
        best_thresh_idx = np.argmax(all_thresh)
        this_line = [listthres[best_thresh_idx]]
        for i in range(len(col) - 1):# - 1 because threshold
            this_line.append(round(np.mean(np_all_res[:,cond,best_thresh_idx,i]), 4))
        lines.append(this_line)
        
        best_val = []
        for each_song in range(len(list_files_wav)):
            best_thresh_idx = np.argmax(np_all_res[each_song,cond,:,2])
            best_val.append([round(np_all_res[each_song,cond,best_thresh_idx,i], 4) for i in range(len(col) - 1)])
        lines_opt_thresh.append([round(np.mean(np.array(best_val)[:,i]),4) for i in range(len(col) - 1)])
    
    """printmd("### When averaging each threshold on all MAPS")
    df = pd.DataFrame(np.array(lines), columns = col, index = index_pandas)
    display(df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))

    printmd("### When optimizing the threshold on each song")
    best_df = pd.DataFrame(np.array(lines_opt_thresh), columns = col[1:], index = index_pandas)
    display(best_df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))"""
    
    return np.array(lines), np.array(lines_opt_thresh)

# Big table with all results

In [3]:
big_table_fix = []
big_table_opt = []
piano_W = []
piano_H = []
the_t = []
for piano_type_W in ["AkPnCGdD","ENSTDkCl","AkPnBcht","AkPnBsdf","AkPnStgb","ENSTDkAm","SptkBGAm","StbgTGd2"]:
    for piano_type_H in ["AkPnCGdD","ENSTDkCl"]:
        try:
            fix_thresh_cond, opt_thresh_cond = compute_scores_database(piano_type_W, piano_type_H, H_normalization = False, adaptative_threshold = True)
            for line_fix in fix_thresh_cond:
                big_table_fix.append(line_fix[:5])
            for line_opt in opt_thresh_cond:
                big_table_opt.append(line_opt[:4])
            for t in [5, 10, 20]:
                piano_W.append(f"W: {piano_type_W}")
                piano_H.append(f"H: {piano_type_H}")
                the_t.append(f"T: {t}")
        except IndexError:
            print("We have to compute this piano at these conditions (piano {}, T = 5, 10, 20, tol = 1e-8 and itmax = 100).".format(piano_type))
                               
printmd("## Fixing the threshold for all songs")
index_pandas = [np.array(piano_W), np.array(piano_H), np.array(the_t)]
col = ['Best threshold','Precision', 'Recall', 'F measure','Accuracy']
   
df = pd.DataFrame(np.array(big_table_fix), columns = col, index = index_pandas)
display(df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))

printmd("## Optimizing the threshold on each song")
df = pd.DataFrame(np.array(big_table_opt), columns = col[1:], index = index_pandas)
display(df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))

Piano for W: AkPnCGdD, and for H: AkPnCGdD

Piano for W: AkPnCGdD, and for H: ENSTDkCl

Piano for W: ENSTDkCl, and for H: AkPnCGdD

Piano for W: ENSTDkCl, and for H: ENSTDkCl

Piano for W: AkPnBcht, and for H: AkPnCGdD

Piano for W: AkPnBcht, and for H: ENSTDkCl

Piano for W: AkPnBsdf, and for H: AkPnCGdD

Piano for W: AkPnBsdf, and for H: ENSTDkCl

Piano for W: AkPnStgb, and for H: AkPnCGdD

Piano for W: AkPnStgb, and for H: ENSTDkCl

Piano for W: ENSTDkAm, and for H: AkPnCGdD

Piano for W: ENSTDkAm, and for H: ENSTDkCl

Piano for W: SptkBGAm, and for H: AkPnCGdD

Piano for W: SptkBGAm, and for H: ENSTDkCl

Piano for W: StbgTGd2, and for H: AkPnCGdD

Piano for W: StbgTGd2, and for H: ENSTDkCl

## Fixing the threshold for all songs

Unnamed: 0,Unnamed: 1,Unnamed: 2,Best threshold,Precision,Recall,F measure,Accuracy
W: AkPnCGdD,H: AkPnCGdD,T: 5,0.06,0.8955,0.8124,0.8361,0.7309
W: AkPnCGdD,H: AkPnCGdD,T: 10,0.08,0.9437,0.8855,0.9069,0.8379
W: AkPnCGdD,H: AkPnCGdD,T: 20,0.12,0.9414,0.906,0.9189,0.8582
W: AkPnCGdD,H: ENSTDkCl,T: 5,0.12,0.6435,0.6089,0.5975,0.4359
W: AkPnCGdD,H: ENSTDkCl,T: 10,0.18,0.6721,0.6372,0.6326,0.4721
W: AkPnCGdD,H: ENSTDkCl,T: 20,0.27,0.6843,0.6532,0.6513,0.4927
W: ENSTDkCl,H: AkPnCGdD,T: 5,0.05,0.7041,0.6608,0.6507,0.4936
W: ENSTDkCl,H: AkPnCGdD,T: 10,0.09,0.7919,0.6741,0.7066,0.5565
W: ENSTDkCl,H: AkPnCGdD,T: 20,0.13,0.6988,0.6855,0.6749,0.5147
W: ENSTDkCl,H: ENSTDkCl,T: 5,0.12,0.8137,0.7746,0.7793,0.6509


## Optimizing the threshold on each song

Unnamed: 0,Unnamed: 1,Unnamed: 2,Precision,Recall,F measure,Accuracy
W: AkPnCGdD,H: AkPnCGdD,T: 5,0.9061,0.8534,0.877,0.788
W: AkPnCGdD,H: AkPnCGdD,T: 10,0.9581,0.922,0.9393,0.8933
W: AkPnCGdD,H: AkPnCGdD,T: 20,0.9548,0.9343,0.9441,0.9008
W: AkPnCGdD,H: ENSTDkCl,T: 5,0.6812,0.6075,0.6393,0.4774
W: AkPnCGdD,H: ENSTDkCl,T: 10,0.7123,0.6476,0.6734,0.5154
W: AkPnCGdD,H: ENSTDkCl,T: 20,0.6952,0.6698,0.6795,0.523
W: ENSTDkCl,H: AkPnCGdD,T: 5,0.7387,0.6681,0.6978,0.5438
W: ENSTDkCl,H: AkPnCGdD,T: 10,0.7974,0.7119,0.7489,0.6058
W: ENSTDkCl,H: AkPnCGdD,T: 20,0.737,0.6898,0.7092,0.5544
W: ENSTDkCl,H: ENSTDkCl,T: 5,0.8494,0.7982,0.8202,0.7045
