In [1]:
import numpy as np
import mir_eval.transcription
import os
from IPython.display import display, Markdown

import transcribe_factorization as tf
import evaluate_transcription as et
import STFT
import pandas as pd
import note_seq as ns

In [2]:
def load_ref_in_array_midi(ref_path, time_limit=None):
  
    truth_array = []
    note_seq = ns.midi_file_to_note_sequence(ref_path).notes
    for i in range(len(note_seq)):
        note = note_seq[i]
        line_to_array = [note.start_time, note.end_time, note.pitch]

        if(time_limit != None) and (float(line_to_array[0]) > time_limit):
            # if onset > time_limit (note outside of the cropped excerpt)
            break
        else:
            truth_array.append(line_to_array)

    return truth_array


In [5]:
def printmd(string):
    display(Markdown(string))

def compute_scores_database_maestro(piano_type, T, H_normalization = False, adaptative_threshold = True):
    
    # we only have activations for T = 10
    path_computed_H = "../data_persisted/activation_maestro_total"
    path_songs = "/Users/***/Downloads/piano_songs/test_songs"
    path_ref = "/Users/***/Downloads/piano_songs/test_midi"
    
    time_limit = 30
    beta = 1
    init = "L1"
    model_AD = True
    note_intensity = "M"
    itmax_H = 100
    tol = 1e-8
    codebook = range(21, 109)
    onset_tolerance = 50/1000
    
    #a = np.arange(1e-3, 1e-2, 1e-3)
    #b = np.arange(1e-4, 1e-3, 1e-4)
    #c = np.arange(1e-5, 1e-4, 1e-5)
    #d = np.arange(1e-6, 1e-5, 1e-6)
    f = np.arange(1e-2, 6e-1, 2e-2)
    #g = np.arange(0.3, 0.5, 0.1)
    #h = np.arange(1, 1.5, 0.1)
    #j = np.arange(1e-7, 1e-6, 1e-7)
    #listthres = np.r_[h[::-1], g[::-1], f[::-1], a[::-1], b[::-1], c[::-1], d[::-1], j[::-1]]
    listthres = np.r_[f[::-1]]

    files = os.listdir(path_songs)
    list_files_wav = []
    for it_files in files:
        if it_files.split(".")[-1] == "wav":
            list_files_wav.append(it_files)

    all_res = []
    for a_song in list_files_wav:
        song_name = a_song.replace(".wav", "")
#         print("processing piano song: {}".format(song_name))
        path_this_song = "{}/{}".format(path_songs, a_song)
        stft = STFT.STFT(path_this_song, time = time_limit, model_AD=model_AD, channel = 0)

        X = stft.get_magnitude_spectrogram()

        annot_name = song_name + ".midi"
        H_persisted_name = "activations_song_{}_W_learned_{}_beta_{}_T_{}_init_{}_stftAD_{}_itmax_{}_intensity_W_{}_time_limit_{}_tol_{}".format(song_name, piano_type, beta, T, init, model_AD, itmax_H, note_intensity, time_limit, tol)
        annot_this_song = "{}/{}".format(path_ref, annot_name)
        note_annotations = load_ref_in_array_midi(annot_this_song, time_limit=time_limit)
        ref = np.array(note_annotations, float)
        ref_pitches = np.array(ref[:,2], int)
        try:
            H = np.load("{}/{}.npy".format(path_computed_H, H_persisted_name), allow_pickle = True)
            res_each_song = []
            for threshold in listthres:
                prediction, _ = tf.transcribe_activations_dynamic(codebook, H, stft, threshold, H_normalization = H_normalization)
                est = np.array(prediction, float)
                if est.size > 0:
                    est_pitches = np.array(est[:,2], int)
                    (prec, rec, f_mes, _) = mir_eval.transcription.precision_recall_f1_overlap(ref[:,0:2], ref_pitches, est[:,0:2], est_pitches, offset_ratio = None, onset_tolerance = onset_tolerance)
                    matching = mir_eval.transcription.match_notes(ref[:,0:2], ref_pitches, est[:,0:2],est_pitches, onset_tolerance=onset_tolerance,offset_ratio=None)
                    TP = len(matching)
                    try:
                        FP = int(TP * (1 - prec) / prec)
                    except ZeroDivisionError:
                        FP = 0
                    try:
                        FN = int(TP * (1 - rec) / rec)
                    except ZeroDivisionError:
                        FN = 0
                    acc = et.accuracy(TP,FP,FN)
                else:
                    prec, rec, f_mes, acc, TP, FP, FN = (0,0,0,0,0,0,0)
                res_each_song.append([prec, rec, f_mes, acc, TP, FP, FN])
            all_res.append(res_each_song)
        except FileNotFoundError:
            print("\033[91m This song failed: {} \033[00m".format(a_song)) 
            pass
        
    np_all_res = np.array(all_res)
    lines = []
    col = ['Precision', 'Recall', 'F measure','Accuracy','True Positives','False Positives','False Negatives']
    for idx, threshold in enumerate(listthres):
        lines.append([round(np.mean(np_all_res[:,idx,i]), 4) for i in range(len(col))])
    df = pd.DataFrame(np.array(lines), columns = col, index = np.round(listthres,decimals = 5))
    display(df.style.bar(subset=["F measure", "Accuracy"], color='#5fba7d'))
    
    best_val = []
    for each_song in range(len(list_files_wav)):
        best_thresh_idx = np.argmax(np_all_res[each_song,:,2])
        best_val.append([round(np_all_res[each_song,best_thresh_idx,i], 4) for i in range(len(col))])
    
    best_df = pd.DataFrame(np.array([np.mean(np.array(best_val)[:,i]) for i in range(len(col))]), columns = ["best res when optimizing thresh"], index = col)
    display(best_df.T)

In [6]:
# intensity M
T = 10
piano_type = "AkPnCGdD"
print('piano_type = ' + piano_type)
compute_scores_database_maestro(piano_type, T, H_normalization = False, adaptative_threshold = True)

piano_type = AkPnCGdD


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
0.59,0.7831,0.2057,0.2863,0.1882,66.322,18.322,226.3898
0.57,0.7827,0.2153,0.2968,0.1958,69.4802,20.2768,224.1808
0.55,0.778,0.2257,0.3078,0.2037,72.8362,22.4972,221.887
0.53,0.7703,0.2376,0.3204,0.2128,76.565,25.1243,218.1469
0.51,0.7669,0.249,0.3315,0.2209,80.2542,28.2147,215.8023
0.49,0.7589,0.2611,0.3428,0.2291,84.1469,31.6102,211.9379
0.47,0.7622,0.2739,0.3549,0.2379,88.2486,35.2542,210.3729
0.45,0.7556,0.2879,0.3676,0.2476,92.7571,39.3333,205.8475
0.43,0.7448,0.3025,0.38,0.2568,97.435,44.3955,201.1412
0.41,0.7399,0.318,0.3934,0.2668,102.3955,49.7119,196.435


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
best res when optimizing thresh,0.598934,0.592421,0.58761,0.433112,179.80791,149.502825,122.135593


In [7]:
T = 10
piano_type = "ENSTDkCl"
print('piano_type = ' + piano_type)
compute_scores_database_maestro(piano_type, T, H_normalization = False, adaptative_threshold = True)

piano_type = ENSTDkCl


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
0.59,0.7624,0.1882,0.2627,0.1687,60.9774,20.5537,229.4915
0.57,0.7572,0.1972,0.2723,0.1754,63.9774,22.7684,226.4972
0.55,0.7513,0.2066,0.2817,0.1819,67.1243,25.5028,224.2712
0.53,0.7526,0.2174,0.2929,0.1898,70.5876,28.5028,224.1977
0.51,0.7527,0.2287,0.3039,0.1974,74.2542,31.9661,222.0169
0.49,0.7484,0.241,0.3158,0.2059,78.2429,35.5593,219.0621
0.47,0.7395,0.2538,0.3275,0.2142,82.4972,39.9944,214.7797
0.45,0.7281,0.2665,0.3387,0.222,86.5932,45.2712,210.6328
0.43,0.717,0.2812,0.3509,0.2307,91.3051,51.2203,205.9266
0.41,0.7051,0.2967,0.3635,0.2397,96.2655,57.9435,200.9661


Unnamed: 0,Precision,Recall,F measure,Accuracy,True Positives,False Positives,False Negatives
best res when optimizing thresh,0.561141,0.568802,0.556935,0.399537,170.915254,162.519774,131.090395
