In [1]:
import barmuscomp.scripts.default_path as paths
import barmuscomp.scripts.overall_scripts as scr
import barmuscomp.lra as lra
import as_seg.CBM_algorithm as CBM
import as_seg.barwise_input as bi
import as_seg.data_manipulation as dm
import as_seg.autosimilarity_computation as as_comp

import os
import pandas as pd
import mirdata
import numpy as np
import math
from IPython.display import display, Markdown
import tensorly as tl

In [2]:
def return_results(references_segments, segments_in_time):
    prec05, rap05, f_mes05 = dm.compute_score_of_segmentation(references_segments, segments_in_time, window_length = 0.5)
    prec3, rap3, f_mes3 = dm.compute_score_of_segmentation(references_segments, segments_in_time, window_length = 3)
    return [[round(prec05,4),round(rap05,4),round(f_mes05,4)], [round(prec3,4),round(rap3,4),round(f_mes3,4)]]

In [3]:
def fixed_conditions_results_salami(feature = "log_mel_grill", autosimilarity_type = "cosine", compression_technique = "pca", dc = 16, 
                                    lambda_penalty = 1, convolution_type = "full", penalty_func = "modulo8"):
    salami = mirdata.initialize('salami', data_home = paths.path_entire_salami)
    len_salami = len(salami.track_ids)

    results_songs = -math.inf * np.ones((len_salami, 2, 3))
    
    hop_length = 32
    hop_length_seconds = hop_length/44100
    subdivision = 96
    
    all_tracks = salami.load_tracks()
    
    song_idx = 0
    
    file_test_set_gs = open(f"{os.getcwd()}/test_set_salami_gs.txt")

    test_dataset = []
    for part in file_test_set_gs.readlines():
        line_broken = part.split("\n")
        test_dataset.append(int(line_broken[0]))

    for key, track in all_tracks.items():
        if int(key) in test_dataset:
            try:               
                bars = scr.load_or_save_bars(paths.path_data_persisted_salami, track.audio_path)
                tensor_barwise = scr.load_or_save_tensor_spectrogram(paths.path_data_persisted_salami, track.audio_path, feature, hop_length, subdivision)
                barwise_TF_matrix = tl.unfold(tensor_barwise, 0)
  
                local_dc = min(dc, barwise_TF_matrix.shape[0])

                ref_tab = []
                try:
                    references_segments = salami.load_sections(track.sections_annotator1_uppercase_path).intervals
                    ref_tab.append(references_segments)
                except (TypeError, AttributeError):
                    pass

                try:
                    references_segments = salami.load_sections(track.sections_annotator2_uppercase_path).intervals
                    ref_tab.append(references_segments)
                except (TypeError, AttributeError):
                    pass

                if compression_technique is None:
                    Q_matrix = barwise_TF_matrix
                elif compression_technique == "pca":
                    try:
                        Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/pca/salami/pca_dim{local_dc}_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                    except FileNotFoundError:
                        Q_matrix = lra.pca_projection(barwise_TF_matrix, compression_dimension = local_dc)
                        np.save(f"{paths.path_data_persisted_rwc}/pca/salami/pca_dim{local_dc}_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                elif compression_technique == "nmf_euc":
                    try:
                        Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{local_dc}_updatehals_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                    except FileNotFoundError:
                        Q_matrix = lra.nmf_computation(barwise_TF_matrix, local_dc, beta = 2, init = "nndsvd")[0]
                        np.save(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{local_dc}_updatehals_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                elif compression_technique == "nmf_kl":
                    try:
                        Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{local_dc}_updatemu_beta1_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                    except FileNotFoundError:
                        Q_matrix = lra.nmf_computation(barwise_TF_matrix, local_dc, beta = 1, init = "nndsvd")[0]
                        np.save(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{local_dc}_updatemu_beta1_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                elif compression_technique == "nmf_is":
                    try:
                        Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{local_dc}_updatemu_beta0_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                    except FileNotFoundError:
                        Q_matrix = lra.nmf_computation(barwise_TF_matrix, local_dc, beta = 0, init = "nndsvd")[0]
                        np.save(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{local_dc}_updatemu_beta0_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                else:
                    raise NotImplementedError("Compression method not understood")

                autosimilarity = as_comp.switch_autosimilarity(Q_matrix, similarity_type = autosimilarity_type, normalise = True)
                segments = CBM.compute_cbm(autosimilarity, penalty_weight = lambda_penalty, penalty_func = penalty_func, convolution_type = convolution_type)[0]                
                segments_in_time = dm.segments_from_bar_to_time(segments, bars)

                score_zero_five = dm.compute_score_of_segmentation(ref_tab[0], segments_in_time, window_length = 0.5)
                score_three = dm.compute_score_of_segmentation(ref_tab[0], segments_in_time, window_length = 3)
                if len(ref_tab) > 1:
                    second_score_zero_five = dm.compute_score_of_segmentation(ref_tab[1], segments_in_time, window_length = 0.5)
                    second_score_three = dm.compute_score_of_segmentation(ref_tab[1], segments_in_time, window_length = 3)
                    if second_score_zero_five[2] + second_score_three[2] > score_zero_five[2] + score_three[2]: # f measure
                        score_zero_five = second_score_zero_five
                        score_three = second_score_three 

                results_songs[song_idx, 0] = score_zero_five 
                results_songs[song_idx, 1] = score_three

                song_idx += 1  
    
            except FileNotFoundError:
                print(f"{key} not found, normal ?")

    print(f"Tested on {song_idx} songs")
    lines = np.array(["Precision 0.5", "Recall 0.5", "F measure 0.5","Precision 3", "Recall 3", "F measure 3"])  
    tab = []
    res_np = results_songs[:song_idx]
    tab.append([round(np.mean(res_np[:,0,0]),5), round(np.mean(res_np[:, 0,1]),5), round(np.mean(res_np[:, 0,2]),5), round(np.mean(res_np[:,1, 0]),5), round(np.mean(res_np[:, 1,1]),5), round(np.mean(res_np[:, 1,2]),5)])

    display(pd.DataFrame(tab, index=[f"Method:{compression_technique}, autosimilarity: {autosimilarity_type}"], columns=lines))

In [4]:
def learn_and_test_salami(feature = "log_mel_grill", autosimilarity_type = "cosine", compression_technique = "pca", 
                          list_dc = [8,16, 24, 32,40], 
                          lambda_penalty = 1, convolution_type = "full", penalty_func = "modulo8"):
    """
    Learning the latent space dimension on the non-test subset of the SALAMI dataset, and computes the results on the SALAMI test-dataset.
    """
    salami = mirdata.initialize('salami', data_home = paths.path_entire_salami)
    len_salami = len(salami.track_ids)
    
    results_songs = -math.inf * np.ones((len_salami, len(list_dc), 2, 3))
    
    hop_length = 32
    hop_length_seconds = hop_length/44100
    subdivision = 96
    
    all_tracks = salami.load_tracks()    
    song_idx = 0
    file_mirex = open(f"{os.getcwd()}/test_set_salami_gs.txt")

    test_dataset = []
    for part in file_mirex.readlines():
        line_broken = part.split("\n")
        test_dataset.append(int(line_broken[0]))
            
    for key, track in all_tracks.items():
        int_key = int(key)
        if int_key not in test_dataset: # Every other file than the test dataset
            try:
                bars = scr.load_or_save_bars(paths.path_data_persisted_salami, track.audio_path)
                tensor_barwise = scr.load_or_save_tensor_spectrogram(paths.path_data_persisted_salami, track.audio_path, feature, hop_length, subdivision)
                barwise_TF_matrix = tl.unfold(tensor_barwise, 0)
                
                if barwise_TF_matrix.shape[0] < np.amax(list_dc):
                    pass
                    #print(f"Song {key} with too small dimension")
                else:
                   
                    ref_tab = []
                    try:
                        references_segments = salami.load_sections(track.sections_annotator1_uppercase_path).intervals
                        ref_tab.append(references_segments)
                    except (TypeError, AttributeError):
                        pass

                    try:
                        references_segments = salami.load_sections(track.sections_annotator2_uppercase_path).intervals
                        ref_tab.append(references_segments)
                    except (TypeError, AttributeError):
                        pass

                    for idx_dc, dc in enumerate(list_dc):            
                
                        if compression_technique == "pca":
                            try:
                                Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/pca/salami/pca_dim{dc}_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                            except FileNotFoundError:
                                Q_matrix = lra.pca_projection(barwise_TF_matrix, compression_dimension = dc)
                                np.save(f"{paths.path_data_persisted_rwc}/pca/salami/pca_dim{dc}_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                        elif compression_technique == "nmf_euc":
                            try:
                                Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{dc}_updatehals_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                            except FileNotFoundError:
                                Q_matrix = lra.nmf_computation(barwise_TF_matrix, dc, beta = 2, init = "nndsvd")[0]
                                np.save(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{dc}_updatehals_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                        elif compression_technique == "nmf_kl":
                            try:
                                Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{dc}_updatemu_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                            except FileNotFoundError:
                                Q_matrix = lra.nmf_computation(barwise_TF_matrix, dc, beta = 1, init = "nndsvd")[0]
                                np.save(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{dc}_updatemu_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                        elif compression_technique == "nmf_is":
                            try:
                                Q_matrix = np.load(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{dc}_updatemu_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}.npy", allow_pickle = True)
                            except FileNotFoundError:
                                Q_matrix = lra.nmf_computation(barwise_TF_matrix, dc, beta = 0, init = "nndsvd")[0]
                                np.save(f"{paths.path_data_persisted_rwc}/nmf/salami/Q_nmf_dim{dc}_updatemu_initnndsvd_song{key}_feature{feature}_hop{hop_length}_subdivbars{subdivision}", Q_matrix)
                        else:
                            raise NotImplementedError("Compression method not understood")
                            
                        autosimilarity = as_comp.switch_autosimilarity(Q_matrix, similarity_type = autosimilarity_type, normalise = True)
                        segments = CBM.compute_cbm(autosimilarity, penalty_weight = lambda_penalty, penalty_func = penalty_func, convolution_type = convolution_type)[0]                
                        segments_in_time = dm.segments_from_bar_to_time(segments, bars)

                        score_zero_five = dm.compute_score_of_segmentation(ref_tab[0], segments_in_time, window_length = 0.5)
                        score_three = dm.compute_score_of_segmentation(ref_tab[0], segments_in_time, window_length = 3)
                        if len(ref_tab) > 1:
                            second_score_zero_five = dm.compute_score_of_segmentation(ref_tab[1], segments_in_time, window_length = 0.5)
                            second_score_three = dm.compute_score_of_segmentation(ref_tab[1], segments_in_time, window_length = 3)
                            if second_score_zero_five[2] + second_score_three[2] > score_zero_five[2] + score_three[2]: # f measure
                                score_zero_five = second_score_zero_five
                                score_three = second_score_three 

                        results_songs[song_idx, idx_dc, 0] = score_zero_five 
                        results_songs[song_idx, idx_dc, 1] = score_three
                            
                    song_idx += 1  
    
            except FileNotFoundError:
                print(f"{key} not found, normal ?")

    mean_dc = []
    for idx_dc, dc in enumerate(list_dc):
        mean_dc.append(round(np.mean(results_songs[:song_idx, idx_dc, 0, 2]) + np.mean(results_songs[:song_idx, idx_dc, 1, 2]), 5))
    arg_best_dc = np.argmax(mean_dc)
    best_dc = list_dc[arg_best_dc]

    display(pd.DataFrame(np.array([best_dc, song_idx]), index = ['Best latent space dimension', 'Number of songs in learning dataset'], columns = ["Learned parameters"]).T)

    return fixed_conditions_results_salami(feature = feature, autosimilarity_type = autosimilarity_type, compression_technique = compression_technique, dc = best_dc, 
                                           lambda_penalty = lambda_penalty, convolution_type = convolution_type, penalty_func = penalty_func)

In [5]:
learn_and_test_salami(feature = "log_mel_grill", autosimilarity_type = "cosine", compression_technique = "pca")

710 not found, normal ?
716 not found, normal ?
1248 not found, normal ?
722 not found, normal ?
720 not found, normal ?
711 not found, normal ?
718 not found, normal ?
717 not found, normal ?
63 not found, normal ?
719 not found, normal ?
714 not found, normal ?
709 not found, normal ?
261 not found, normal ?
724 not found, normal ?
878 not found, normal ?
712 not found, normal ?
715 not found, normal ?
723 not found, normal ?


Unnamed: 0,Best latent space dimension,Number of songs in learning dataset
Learned parameters,40,784


70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:pca, autosimilarity: cosine",0.376,0.5555,0.435,0.5321,0.7947,0.6184


In [6]:
learn_and_test_salami(feature = "nn_log_mel_grill", autosimilarity_type = "cosine", compression_technique = "pca")

710 not found, normal ?
716 not found, normal ?
1248 not found, normal ?
722 not found, normal ?
720 not found, normal ?
711 not found, normal ?
718 not found, normal ?
717 not found, normal ?
63 not found, normal ?
719 not found, normal ?
714 not found, normal ?
709 not found, normal ?
261 not found, normal ?
724 not found, normal ?
878 not found, normal ?
712 not found, normal ?
715 not found, normal ?
723 not found, normal ?


Unnamed: 0,Best latent space dimension,Number of songs in learning dataset
Learned parameters,40,784


70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:pca, autosimilarity: cosine",0.3674,0.5746,0.4341,0.5127,0.8081,0.6075


In [7]:
learn_and_test_salami(feature = "nn_log_mel_grill", autosimilarity_type = "cosine", compression_technique = "nmf_euc")

710 not found, normal ?
716 not found, normal ?
1248 not found, normal ?
722 not found, normal ?
720 not found, normal ?
711 not found, normal ?
718 not found, normal ?
717 not found, normal ?
63 not found, normal ?
719 not found, normal ?
714 not found, normal ?
709 not found, normal ?
261 not found, normal ?
724 not found, normal ?
878 not found, normal ?
712 not found, normal ?
715 not found, normal ?
723 not found, normal ?


Unnamed: 0,Best latent space dimension,Number of songs in learning dataset
Learned parameters,16,784


70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:nmf_euc, autosimilarity: cosine",0.4071,0.4934,0.4346,0.5814,0.7038,0.6203


In [8]:
learn_and_test_salami(feature = "nn_log_mel_grill", autosimilarity_type = "cosine", compression_technique = "nmf_kl")

710 not found, normal ?
716 not found, normal ?
1248 not found, normal ?
722 not found, normal ?
720 not found, normal ?
711 not found, normal ?
718 not found, normal ?
717 not found, normal ?
63 not found, normal ?
719 not found, normal ?
714 not found, normal ?
709 not found, normal ?
261 not found, normal ?
724 not found, normal ?
878 not found, normal ?
712 not found, normal ?
715 not found, normal ?
723 not found, normal ?


Unnamed: 0,Best latent space dimension,Number of songs in learning dataset
Learned parameters,16,784


70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:nmf_kl, autosimilarity: cosine",0.4176,0.5043,0.4438,0.5869,0.7097,0.6239


In [9]:
learn_and_test_salami(feature = "nn_log_mel_grill", autosimilarity_type = "cosine", compression_technique = "nmf_is")

710 not found, normal ?
716 not found, normal ?
1248 not found, normal ?
722 not found, normal ?
720 not found, normal ?
711 not found, normal ?
718 not found, normal ?
717 not found, normal ?
63 not found, normal ?
719 not found, normal ?
714 not found, normal ?
709 not found, normal ?
261 not found, normal ?
724 not found, normal ?
878 not found, normal ?
712 not found, normal ?
715 not found, normal ?
723 not found, normal ?


Unnamed: 0,Best latent space dimension,Number of songs in learning dataset
Learned parameters,16,784


70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:nmf_is, autosimilarity: cosine",0.396,0.5114,0.4352,0.5658,0.7303,0.6216


In [11]:
fixed_conditions_results_salami(feature = "nn_log_mel_grill", autosimilarity_type = "cosine", compression_technique = None)

70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:None, autosimilarity: cosine",0.4485,0.3709,0.3917,0.6378,0.5294,0.5585


In [12]:
fixed_conditions_results_salami(feature = "log_mel_grill", autosimilarity_type = "cosine", compression_technique = None)

70 not found, normal ?
Tested on 486 songs


Unnamed: 0,Precision 0.5,Recall 0.5,F measure 0.5,Precision 3,Recall 3,F measure 3
"Method:None, autosimilarity: cosine",0.4559,0.3205,0.3664,0.6327,0.4425,0.5072
