In [1]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline
import jams
import jams.display
import mir_eval
import pandas as pd
import os
import scipy
from joblib import Parallel, delayed

In [2]:
def selfsim(x, metric='sqeuclidean'):
    
    D = scipy.spatial.distance.cdist(x.T, x.T, metric=metric)
    bw = np.mean(np.median(D, axis=1))
    
    return np.exp(-D / bw)

def simsim(A, B):
    '''Calculates the cross-correlation between two square matrices A and B'''
    n = min(len(A), len(B))
    
    A = A[:n, :n] - np.mean(A[:n, :n])
    B = B[:n, :n] - np.mean(B[:n, :n])
    
    anorm = np.sqrt(np.sum(A[:n,:n]**2))
    bnorm = np.sqrt(np.sum(B[:n,:n]**2))
    
    return np.sum(A[:n, :n] * B[:n, :n]) / (anorm * bnorm)

In [3]:
def compare_data(jam_file, feature):
    
    jam = jams.load(jam_file)
    
    F = np.load(feature)
    
    # Compute feature self-similarity matrices
    S = dict()
    
    S['tempo'] = selfsim(F['tempo'])
    S['rhythm'] = selfsim(librosa.logamplitude(F['rhythm']**2, ref_power=np.max))
    S['chroma'] = selfsim(F['chroma'])
    S['mfcc'] = selfsim(F['mfcc'])
    
    anns = jam.annotations['multi']
    
    results = []
    track = os.path.basename(jam_file)
    
    for ann in anns:
        # Build the annotation self-similarity matrix
        h_ints, h_labs = jams.eval.hierarchy_flatten(ann)
        h_sim = np.asarray(mir_eval.hierarchy._meet(h_ints, h_labs, 0.25).todense())
        
        #v = [simsim(h_sim, A) for A in [A_tempo, A_rhythm, A_chroma, A_mfcc]]
        v = {key: simsim(h_sim, S[key]) for key in S}
        v['track'] = track
        v['annotator'] = ann.annotation_metadata.annotator.name
        
        results.append(v)
    return results

In [6]:
annotations = dict()
annotations['salami'] = jams.util.find_with_extension('../data/SALAMI/', 'jams')
annotations['spam'] = jams.util.find_with_extension('../data/SPAM/', 'jams')

In [7]:
features = dict()
features['salami'] = jams.util.find_with_extension('../../../working/segment_labels_features/SALAMI/', 'npz')
features['spam'] = jams.util.find_with_extension('../../../working/segment_labels_features/SPAM/', 'npz')

In [8]:
data = dict()
data['salami'] = {J: os.path.join('../../../working/segment_labels_features/SALAMI/',
                                      os.extsep.join([os.path.splitext(os.path.basename(J))[0], 'npz']))
                  for J in annotations['salami']}

data['spam'] = {J:A for J,A in zip(annotations['spam'], features['spam'])}

In [10]:
feature_sim = dict()

In [None]:
feature_sim['spam'] = Parallel(n_jobs=20, verbose=1)(delayed(compare_data)(jf, data['spam'][jf]) for jf in data['spam'])

In [54]:
feature_sim['salami'] = Parallel(n_jobs=20, verbose=1)(delayed(compare_data)(jf, data['salami'][jf]) for jf in data['salami'])

[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.9s
[Parallel(n_jobs=20)]: Done 195 tasks      | elapsed:   20.8s
[Parallel(n_jobs=20)]: Done 445 tasks      | elapsed:   47.5s
[Parallel(n_jobs=20)]: Done 796 tasks      | elapsed:  1.5min
[Parallel(n_jobs=20)]: Done 1246 tasks      | elapsed:  2.3min
[Parallel(n_jobs=20)]: Done 1359 out of 1359 | elapsed:  3.1min finished


In [None]:
df_spam = pd.DataFrame.from_records([item for sublist in feature_sim['spam'] for item in sublist])

In [55]:
df_salami = pd.DataFrame.from_records([item for sublist in feature_sim['salami'] for item in sublist])

In [57]:
df_salami.to_json('../data/salami_feature_correlations.json')

In [None]:
df_spam.to_json('../data/spam_feature_correlations.json')