# Compare aligned audio segments
Compare music similarity between similar structural segments by hierarchically decomposing structure and finding segment alignment



## > Library importing

In [None]:
#Computation
import numpy as np
import scipy
from scipy.interpolate import interp2d
import matplotlib.pyplot as plt
import cv2

#Data Processing
import sklearn.cluster
import sklearn

#Audio
import librosa
from librosa import display

#System
import glob
import os
import sys

#Pickling
import dill

#Reading
import reader
import segment

## > Load annotations and audio files

In [None]:
# Choose parent directory containing audiofiles and annotations
directory = '/Users/chris/Google Drive/Publication Files/CMMR2021/Datasets/isophonics_MJ'

In [None]:
# Load annotation paths
lab_paths = reader.read_paths(directory, '.lab')
# Load to dictionary and fix annotation data
labs = reader.load_lab(lab_paths)
print("Loaded annotations.")

# Cross reference audio with annotations
ref = reader.ref_paths(directory, directory)

# Load audio paths
audio_paths = reader.read_paths(directory, '.flac')
file_no = len(audio_paths)
# Load audio
audio = {}
sr = 22050
for i,path in enumerate(audio_paths):
    audio[os.path.basename(path)[:-5]] = librosa.load(path, sr=sr, mono=True)
    sys.stdout.write("\rLoaded %i/%i pieces." % (i+1, file_no))
    sys.stdout.flush()

## > Get segments from each method

### >> Matching Segment Selection


#### >>> Compute segmentation

In [None]:
kmin = 2
kmax = 7
mss_seg_ids = {}
mss_f = {} #formatted segments
for p in audio_paths:
    name = os.path.basename(p[:-5])
    y, sr = audio[name]
    mss_seg_ids[name], mss_f[name] = segment.segment(y, sr, kmin, kmax)   

In [None]:
#Plotting the segments
n_to_plot = 2
fig, axs = plt.subplots(n_to_plot, 1, figsize=(20, 4*n_to_plot))
for i,p in enumerate(audio_paths):
    name = os.path.basename(p[:-5])
    axs[i].matshow(mss_seg_ids[name], aspect=10)
    axs[i].set(title=name)
    if i>=n_to_plot-1:
        break
plt.show()

#### >>> Find segment hits

In [None]:
for p in audio_paths:
    name = os.path.basename(p[:-5])
#traverse hierarchies, song 1
for i in range(kmax-kmin):
    #traverse segments, song 1
    for j in range(len(mss_f[name][2][i])):
    #traverse hierarchies, song 2
        for k in range(kmax-kmin):
            #traverse segments, song 2
            for l in range(len(all_formatted_beats[s2][k])):
                if all_formatted_beats[s1][i][j][1] == all_formatted_beats[s2][k][l][1]:
                    hits.append([i,j,k,l])
print(hits)


### >> Pad to maximum

#### >>> Pad annotation data

In [None]:
#get maximum length of any audiofile in frames
max_length = 0
for p in audio_paths:
    name = os.path.basename(p[:-5])
    if len(audio[name][0]) > max_length:
        max_length = len(audio[name][0])

labs_PM = {}
for p in audio_paths:
    name = os.path.basename(p[:-5])
    labs_PM[name] = reader.vectorize(lab=labs[name], sr=sr, start_f=0, end_f=max_length)

#### >>> PM scores

### >> Pad pairwise

#### >>> Compute pairwise padding of annotation data

In [None]:
"""
essentially create directed distance matrix by returning 
segments of A in comparison with B when querrying labs_PP[A][B]
and segments of B in comparison with A when querrying labs_PP[B][A]
"""
labs_PP = {}
for p1 in audio_paths:
    name1 = os.path.basename(p1)[:-5]
    d = {} #2D dictionary
    for p2 in audio_paths
        name2 = os.path.basename(p2)[:-5]
        #find length of longer audiofile
        max_length = max(len(audio[name1][0]), len(audio[name2][0]))
        d[name2] = reader.vectorize(lab=labs[name1], sr=sr, start_f=0, end_f=max_length)
    labs_PP[name1] = d


#### >>> PP scores

### >> Truncate to minimum


#### >>> Truncate annotation data

In [None]:
#get minimum length of any audiofile
min_length = len((audio[os.path.basename(audio_paths[0])[:-5]])[0])
for p in audio_paths:
    name = os.path.basename(p[:-5])
    if len(audio[name][0]) < min_length:
        min_length = len(audio[name][0])

labs_TM = {}
for p in audio_paths:
    name = os.path.basename(p[:-5])
    labs_TM[name] = reader.vectorize(lab=labs[name], sr=sr, start_f=0, end_f=min_length)

#### >>> TM scores

### >> Truncate pairwise

#### >>> Compute pairwise truncation of annotation data

In [None]:
"""
essentially create directed distance matrix by returning 
segments of A in comparison with B when querrying labs_PP[A][B]
and segments of B in comparison with A when querrying labs_PP[B][A]
"""
labs_TP = {}
for p1 in audio_paths:
    name1 = os.path.basename(p1)[:-5]
    d = {} #2D dictionary
    for p2 in audio_paths
        name2 = os.path.basename(p2)[:-5]
        #find length of longer audiofile
        max_length = min(len(audio[name1][0]), len(audio[name2][0]))
        d[name2] = reader.vectorize(lab=labs[name1], sr=sr, start_f=0, end_f=min_length)
    labs_TP[name1] = d

#### >>> TP scores

### >> Fixed length from middle

#### >> Get annotations for given interval around the middle of the audio

In [None]:
labs_FL15 = {}
labs_FL30 = {}
labs_FL60 = {}
for p in audio_paths:

    name = os.path.basename(p[:-5])
    audio_length = len(audio[name][0]) #get audio length

    start_f = int(audio_length/2) - int(7.5*sr) #get start of segment, centered around the middle
    end_f = start_f + 15*sr #get end of segment, centered around the middle
    labs_FL15[name] = reader.vectorize(lab=labs[name], sr=sr, start_f=start_f, end_f=end_f)

    start_f = int(audio_length/2)- 15*sr #get start of segment, centered around the middle
    end_f = start_f + 30*sr #get end of segment, centered around the middle
    labs_FL30[name] = reader.vectorize(lab=labs[name], sr=sr, start_f=start_f, end_f=end_f)

    start_f = int(audio_length/2) - 30*sr #get start of segment, centered around the middle
    end_f = start_f + 60*sr #get end of segment, centered around the middle
    labs_FL60[name] = reader.vectorize(lab=labs[name], sr=sr, start_f=start_f, end_f=end_f)

#### >>> FL scores