In [1]:
import msaf
import pandas as pd
import mirdata
import os
pd.set_option('precision', 4)
import numpy as np
import as_seg.scripts.overall_scripts as scr
import as_seg.scripts.default_path as paths
import as_seg.data_manipulation as dm

# Segmentation results of baselines methods

In this notebook are presented the results of the baseline, computed with MSAF.

We restricted the baseline to three algorithms:
 - CNMF [1],
 - Foote's novelty [2],
 - Spectral Clustering [3],
 - Structural Features [4].

In [2]:
desired_algos = ["cnmf", "foote", "scluster", "sf"]

Below is the code to compute these scores.

In [3]:
def parse_all_algos(song_path, ref_tab, bars):
    # Function which computes all frontiers for this song by the desired algorithms,
    # and then computes segmentation scores.
    zero_five_to_return = []
    three_to_return = []

    for algo in desired_algos:
        boundaries, _ = msaf.process(song_path, boundaries_id=algo)
        segments_in_time = np.array(dm.frontiers_to_segments(boundaries))
        
        score_zero_five = dm.compute_score_of_segmentation(ref_tab[0], segments_in_time, window_length = 0.5)
        score_three = dm.compute_score_of_segmentation(ref_tab[0], segments_in_time, window_length = 3)
        i_ref_tab = 0
        if len(ref_tab) > 1:
            second_score_zero_five = dm.compute_score_of_segmentation(ref_tab[1], segments_in_time, window_length = 0.5)
            second_score_three = dm.compute_score_of_segmentation(ref_tab[1], segments_in_time, window_length = 3)
            if second_score_zero_five[2] + second_score_three[2] > score_zero_five[2] + score_three[2]: # f measure
                score_zero_five = second_score_zero_five
                score_three = second_score_three 
                i_ref_tab = 1

        zero_five_to_return.append(score_zero_five)
        three_to_return.append(score_three)
        
        aligned_segments = dm.align_segments_on_bars(segments_in_time, bars)
        
        zero_five_to_return.append(dm.compute_score_of_segmentation(
            ref_tab[i_ref_tab], aligned_segments, window_length=0.5))
        three_to_return.append(dm.compute_score_of_segmentation(
            ref_tab[i_ref_tab], aligned_segments, window_length=3))

        
    return zero_five_to_return, three_to_return

In [4]:
# Script which parses all songs of RWC, computes its frontiers for all algorithms, and then 
zero_point_five_results = []
three_seconds_results = []
five_rates_results = []
three_rates_results = []

salami = mirdata.initialize('salami', data_home = paths.path_entire_salami)
all_tracks = salami.load_tracks()
song_idx = 0
file_test_set_gs = open(f"{os.getcwd()}/test_set_salami_gs.txt")
test_dataset = []
for part in file_test_set_gs.readlines():
    line_broken = part.split("\n")
    test_dataset.append(int(line_broken[0]))

for key, track in all_tracks.items():
    if int(key) in test_dataset:            
        track_path = track.audio_path.replace("Salami\\audio", "Salami/salami-data-public-hierarchy-corrections/audio")
        try:
            bars = scr.load_or_save_bars(paths.path_data_persisted_salami, track.audio_path)

            ref_tab = []
            try:
                references_segments = mirdata.datasets.salami.load_sections(track.sections_annotator1_uppercase_path).intervals
                ref_tab.append(references_segments)
            except (TypeError, AttributeError):
                pass

            try:
                references_segments = mirdata.datasets.salami.load_sections(track.sections_annotator2_uppercase_path).intervals
                ref_tab.append(references_segments)
            except (TypeError, AttributeError):
                pass

            this_zero, this_three = parse_all_algos(track_path, ref_tab, bars)

            zero_point_five_results.append(this_zero)
            three_seconds_results.append(this_three)
        except FileNotFoundError:
            print(f"Song {key} not found, normal?")











Song 70 not found, normal?
























Finally, we print in a nice way the scores computed on the baseline.

In [5]:
zerofive = np.array(zero_point_five_results)
three = np.array(three_seconds_results)

all_algos = [alg for alg in desired_algos]

params = ['Original', 'Aligned on downbeats']
line = []
subline = []
for i in all_algos:
    for j in params:
        line.append(i)
        subline.append(j)
arr = []
col = [np.array(['0.5 seconds','0.5 seconds','0.5 seconds','3 seconds','3 seconds','3 seconds']),
    np.array(['Precision', 'Recall', 'F measure','Precision', 'Recall', 'F measure'])]

nested_lines = [np.array(line), np.array(subline)]

for i in range(len(line)):
    arr.append([np.mean(zerofive[:,i,0]),np.mean(zerofive[:,i,1]), np.mean(zerofive[:,i,2]),
            np.mean(three[:,i,0]),np.mean(three[:,i,1]), np.mean(three[:,i,2])])

pd.DataFrame(np.array(arr), index=nested_lines, columns=col)

Unnamed: 0_level_0,Unnamed: 1_level_0,0.5 seconds,0.5 seconds,0.5 seconds,3 seconds,3 seconds,3 seconds
Unnamed: 0_level_1,Unnamed: 1_level_1,Precision,Recall,F measure,Precision,Recall,F measure
cnmf,Original,0.2109,0.3122,0.2405,0.396,0.5816,0.4506
cnmf,Aligned on downbeats,0.2407,0.3502,0.2729,0.3946,0.5715,0.4472
foote,Original,0.268,0.34,0.2886,0.5131,0.6427,0.551
foote,Aligned on downbeats,0.3042,0.3832,0.327,0.5055,0.6314,0.5423
scluster,Original,0.2716,0.3669,0.2916,0.4402,0.607,0.4767
scluster,Aligned on downbeats,0.317,0.4166,0.3369,0.4534,0.602,0.4844
sf,Original,0.2919,0.333,0.301,0.5274,0.5952,0.5424
sf,Aligned on downbeats,0.3289,0.3729,0.3384,0.5227,0.5893,0.5372


# References

[1] Nieto, O., & Jehan, T. (2013, May). Convex non-negative matrix factorization for automatic music structure identification. In 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (pp. 236-240). IEEE.

[2] Foote, J. (2000, July). Automatic audio segmentation using a measure of audio novelty. In 2000 IEEE International Conference on Multimedia and Expo. ICME2000. Proceedings. Latest Advances in the Fast Changing World of Multimedia (Cat. No. 00TH8532) (Vol. 1, pp. 452-455). IEEE.

[3] McFee, B., & Ellis, D. (2014). Analyzing Song Structure with Spectral Clustering. In ISMIR (pp. 405-410).

[4] Serra, J., Müller, M., Grosche, P., & Arcos, J. L. (2014). Unsupervised music structure annotation by time series structure features and segment similarity. IEEE Transactions on Multimedia, 16(5), 1229-1240.