In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 30 days


In [2]:
import json

In [32]:
import scipy
import sklearn

In [6]:
MAX_LINKS = 3

In [7]:
def normalize_labels(labels):
    
    remap = {}
    v = 0
    
    for i in labels:
        if i in remap:
            continue
        remap[i] = v
        v = v + 1
        
    return [remap[i] for i in labels]

In [87]:
def get_bounds(seg_ids):
    deltas = np.flatnonzero(seg_ids[:-1] != seg_ids[1:])
    
    if len(deltas):
        return deltas
    else:
        return np.asarray([len(seg_ids) - 2])

def beat_labels_to_segments(beats, seg_ids, lower=None):
    
    bound_beats = get_bounds(seg_ids)
    
    if lower is not None:
        lower_beats = get_bounds(lower)
        
        targets = lower_beats[librosa.util.match_events(bound_beats, lower_beats)]
        reassign_idx = np.abs(bound_beats - targets) < 4
        bound_beats[reassign_idx] = targets[reassign_idx]
    
    segments = ['ABCDEFGHIJKLMNOPQRSTUVWXYZ'[seg_ids[_]] for _ in [0] + list(1 + bound_beats)]
    
    #bound_frames = beats[bound_beats]
    #bound_frames = librosa.util.fix_frames(bound_frames, x_min=0, x_max=beats.max())
    #bound_times = librosa.frames_to_time(bound_frames)
    
    #intervals = np.asarray([bound_times[:-1], bound_times[1:]]).T
    bound_beats = librosa.util.fix_frames(bound_beats, x_min=0, x_max=len(seg_ids)-1)
    
    intervals = np.asarray([bound_beats[:-1], bound_beats[1:]]).T
    idx = np.flatnonzero(np.diff(intervals, axis=1) > 0)
    
    return intervals[idx], normalize_labels([segments[_] for _ in idx])

In [90]:
def analyze(infile):
    
    print('\tLoading')
    y, sr = librosa.load(infile)
    
    print('\tInitial feature extraction')
    C = librosa.logamplitude(np.abs(librosa.cqt(y=y,
                                                sr=sr,
                                                bins_per_octave=12*3,
                                                n_bins=7*12*3,
                                                real=False))**2,
                             ref_power=np.max)
    
    # Let's beat-synchronize this to reduce dimensionality
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
    
    beat_times = librosa.frames_to_time(librosa.util.fix_frames(beats, x_min=0, x_max=C.shape[1]))
    
    beat_intervals = np.asarray([beat_times[:-1], beat_times[1:]]).T
    
    
    Csync = librosa.util.sync(C, beats, aggregate=np.median)
    
    
    # Let's build a weighted recurrence matrix from this
    R = librosa.segment.recurrence_matrix(Csync, width=3, mode='affinity', sym=True)
    
    # Get the knn links
    links = []
    for i in range(len(R)):
        links.append(list(np.flatnonzero(R[i,:])))
        if len(links[-1]) > MAX_LINKS:
            links[-1] = links[-1][:MAX_LINKS]
    
    
    # And enhance diagonals with a median filter
    df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
    Rf = df(R, size=(1, 5))
    
    # Now let's build the sequence matrix using mfcc-similarity
    # R_path[i, i+] = exp(-|C_i - C_j|^2 / bw)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    Msync = librosa.util.sync(mfcc, beats)

    
    print('\tGraph analysis')
    path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
    bw = np.mean(path_distance)
    path_sim = np.exp(-path_distance / bw)

    R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)
    
    # And compute the balanced combination

    deg_path = np.sum(R_path, axis=1)
    deg_rec = np.sum(Rf, axis=1)

    mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)

    A = mu * Rf + (1 - mu) * R_path
    
    # Now let's compute the symmetric normalized laplacian
    L = scipy.sparse.csgraph.laplacian(A, normed=True)
    
    # And its spectral decomposition
    evals, evecs = scipy.linalg.eigh(L)
    
    # We can clean this up further with a median filter.
    # This can help smooth over brief discontinuities
    evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))
    
    # cumulative normalization is needed for symmetric normalize laplacian eigenvectors
    Cnorm = np.cumsum(evecs**2, axis=1)**0.5
    
    print('\tSegmentation')
    all_seg_ids = [np.zeros(len(evecs))]

    for k in range(2, 10):

        X = evecs[:, :k] / Cnorm[:, k-1:k]

        # Let's use these k components to cluster
        KM = sklearn.cluster.KMeans(n_clusters=k)

        all_seg_ids.append(KM.fit_predict(X))
    
    
    layers = [{'boundaries': [0, len(X)], 'labels': [0]}]
    
    for seg_ids, lower in zip(all_seg_ids[1:], all_seg_ids[2:]):
        
        intervals, labels = beat_labels_to_segments(beats, seg_ids)#, lower=lower)
        
        layers.append({'boundaries': intervals[:, 0].tolist(),
                       'labels': labels})
    
    return layers, beat_intervals, links

In [98]:
import os
import re

def get_meta(filename):
    try:
        #R = re.match('.*/(?P<artist>.*)-(?P<title>.*).mp3', filename)
        R = re.match('.*/(?P<artist>The Beatles)/.*/.*_-_(?P<title>.*).flac', filename)
        return R.groups()
    except:
        return ['unknown', os.path.basename(filename)]

import taglib

def get_meta(filename):
    
    song = taglib.File(filename)
    
    return (song.tags.get('ARTIST', ['unknown'])[0],
            song.tags.get('TITLE', 'unknown'))

In [48]:
def process_audio(infile, outfile):
    meta = get_meta(infile)
    parse, beats, links = analyze(infile)
    
    json.dump({'filename': infile,
               'artist': meta[0], 
               'title': meta[1],
               'beats': list(beats[:, 0]),
               'duration': float(beats[-1, 1]),
               'links': links,
               'segments': parse}, open(outfile, 'w'))

In [93]:
ls /home/bmcfee/data/beatles_iso/audio/The\ Beatles

[0m[01;34m01_-_Please_Please_Me[0m/    [01;34m08_-_Sgt._Pepper's_Lonely_Hearts_Club_Band[0m/
[01;34m02_-_With_the_Beatles[0m/    [01;34m09_-_Magical_Mystery_Tour[0m/
[01;34m03_-_A_Hard_Day's_Night[0m/  [01;34m10CD1_-_The_Beatles[0m/
[01;34m04_-_Beatles_for_Sale[0m/    [01;34m10CD2_-_The_Beatles[0m/
[01;34m05_-_Help![0m/               [01;34m11_-_Abbey_Road[0m/
[01;34m06_-_Rubber_Soul[0m/         [01;34m12_-_Let_It_Be[0m/
[01;34m07_-_Revolver[0m/


In [104]:
#files = librosa.util.find_files('/home/bmcfee/data/CAL500/mp3/')
#files = librosa.util.find_files('/home/bmcfee/data/beatles_iso/audio/The Beatles/11_-_Abbey_Road', ext='mp3')
#files = librosa.util.find_files('//home/bmcfee/Music/IAYD - Supergalactic/', recurse=False, ext='mp3')
#files = librosa.util.find_files('//home/bmcfee/', recurse=False, ext='mp3')
files = librosa.util.find_files('/home/bmcfee/working/', recurse=False, ext=['ogg', 'mp3'])

In [109]:
files = files[12:13]

In [110]:
ids = list(range(len(files)))
#ids = [0,1]
np.random.shuffle(ids)

In [111]:
for i in ids[:16]:
    print(i, files[i])
    outpath ='/home/bmcfee/git/lsd_viz/data/{:08d}.json'.format(999 + i) 
    if False and os.path.exists(outpath):
        continue
    process_audio(files[i], outpath)

(0, '/home/bmcfee/working/Radiohead - Paranoid Android-sPLEbAVjiLA.ogg')
	Loading
	Initial feature extraction
	Graph analysis
	Segmentation
