# Music segmentation

## Introduction

In the current example, we showcase how `ruptures` can be used in order to detect change point within a music soundtrack and separate it into several contiguous segments. 

In order to do so, we run `ruptures` segmentation algorythms on two different set of features : 

* A chromagram, that maps the input signal into an usually 12 features space representing the classical 12 pitch classes
* A tempogram computed based on the onset strength envelope of the input signal, that captures the tempo spectrum 

By doing so, we provide two various ways of segmenting a music soundtrack depending on two different representations of the information carried by the input signal, respectively : 

* An harmonic spectral representation
* A rhythmic representation

Many thanks to the creators and maintainers of the [librosa package](https://librosa.org/doc/latest/index.html) for providing :

* a simple and clean way of loading some music data
* some methods for perform meaningful feature engineering



## Setup

First, we make the necessary imports

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
from itertools import product, chain
import numpy as np
import ruptures as rpt  # out package

## Load the data

[Librosa](https://librosa.org/doc/latest/index.html) package offers several soundtracks. We run the example with the `nutcracker` one which corresponds to `Tchaikovsky - Dance of the Sugar Plum Fairy`. See [here](https://librosa.org/doc/latest/recordings.html#description-of-examples) for a description of all other possibilities. 

In [None]:
# Limits the duration of the soundtack to be loaded
duration = 30
y, sampling_rate = librosa.load(librosa.ex("nutcracker"), duration=duration)

## Compute features

### Chromagram

In [None]:
# Number of samples between successive chroma frames
hop_length = 512
# Number of chroma bins to produce
nb_chroma = 12
# Compute de Chromagram using the usual 12 chromas
# chromagram of shape [nb_chroma, np.ceil(duration*sampling_rate/hop_length)]
chromagram = librosa.feature.chroma_cqt(
    y=y, sr=sampling_rate, hop_length=hop_length, n_chroma=nb_chroma
)

### Tempogram

In [None]:
# Length of the onset autocorrelation window (in frames)
win_length = 384
# Compute the onset strength
oenv = librosa.onset.onset_strength(y=y, sr=sampling_rate, hop_length=hop_length)
# tempogram of shape [win_length, np.ceil(y.shape[0]/hop_length)]
tempogram = librosa.feature.tempogram(
    onset_envelope=oenv, sr=sampling_rate, hop_length=hop_length, win_length=win_length
)

## Compute and display segmentations

In [None]:
# We ask for 5 breaking points
nb_bkps = 5

algos = [rpt.BottomUp, rpt.Binseg]
models = ["l2", "l1", "rbf"]  # "l1", "rbf", "linear", "normal", "ar"
kernelCPD_algos = [rpt.KernelCPD]
kernelCPD_kernels = ["linear", "rbf", "cosine"]

model_objs = [(algo(model=model), model) for (algo, model) in product(algos, models)]
for (algo, kernel) in product(kernelCPD_algos, kernelCPD_kernels):
    model_objs.append((algo(kernel=kernel), kernel))

nb_plots = len(models) * len(algos) + len(kernelCPD_kernels) * len(kernelCPD_algos) + 2
fig, axs = plt.subplots(nrows=nb_plots, sharex=True, figsize=(8, 20), dpi=150)

librosa.display.waveplot(y, sr=sampling_rate, ax=axs[0])
axs[0].set(title="nutcracker")
axs[0].label_outer()

for idx, (algo, cost) in zip(range(1, nb_plots), model_objs):
    algo = algo.fit(chromagram.T)
    rpt_bkps = algo.predict(n_bkps=nb_bkps)
    # Make the break points "compatible" with librosa api
    # it expects index 0 and not index of the last point
    rpt_bkps.pop(-1)
    rpt_bkps = [0] + rpt_bkps
    # Translate index in the chromagram to time index in the original soundtrack
    rpt_bpks_times = librosa.frames_to_time(rpt_bkps, sr=sampling_rate)
    # Plot
    librosa.display.specshow(
        chromagram, y_axis="chroma", x_axis="time", ax=axs[idx], hop_length=hop_length
    )
    axs[idx].vlines(
        rpt_bpks_times,
        0,
        chromagram.shape[0],
        color="linen",
        linestyle="--",
        linewidth=1.5,
        alpha=0.9,
    )
    axs[idx].set(title=f"Algo : {type(algo).__name__}, Cost : {cost}")
    axs[idx].label_outer()

algo = rpt.BottomUp(model="l2").fit(chromagram.T)
rpt_bkps = algo.predict(pen=50)
# Make the break points "compatible" with librosa api
# it expects index 0 and not index of the last point
rpt_bkps.pop(-1)
rpt_bkps = [0] + rpt_bkps
# Translate index in the chromagram to time index in the original soundtrack
rpt_bpks_times = librosa.frames_to_time(rpt_bkps, sr=sampling_rate)
# Plot
librosa.display.specshow(
    chromagram,
    y_axis="chroma",
    x_axis="time",
    ax=axs[nb_plots - 1],
    hop_length=hop_length,
)
axs[nb_plots - 1].vlines(
    rpt_bpks_times,
    0,
    chromagram.shape[0],
    color="linen",
    linestyle="--",
    linewidth=1.5,
    alpha=0.9,
)
axs[nb_plots - 1].set(title=f"Algo : {type(algo).__name__}, Cost : l2")
axs[nb_plots - 1].label_outer()

In [None]:
# We ask for 5 breaking points
nb_bkps = 5

algos = [rpt.BottomUp, rpt.Binseg]
models = ["l2", "l1", "rbf"]  # "l1", "rbf", "linear", "normal", "ar"
kernelCPD_algos = [rpt.KernelCPD]
kernelCPD_kernels = ["linear", "rbf", "cosine"]

model_objs = [(algo(model=model), model) for (algo, model) in product(algos, models)]
for (algo, kernel) in product(kernelCPD_algos, kernelCPD_kernels):
    model_objs.append((algo(kernel=kernel), kernel))

nb_plots = len(models) * len(algos) + len(kernelCPD_kernels) * len(kernelCPD_algos) + 3
fig, axs = plt.subplots(nrows=nb_plots, sharex=True, figsize=(8, 20), dpi=150)

times = librosa.times_like(oenv, sr=sampling_rate, hop_length=hop_length)
librosa.display.waveplot(y, sr=sampling_rate, ax=axs[0])
axs[0].set(title="nutcracker")
axs[0].label_outer()
axs[1].plot(times, oenv, label="Onset strength")
axs[1].label_outer()
axs[1].legend(frameon=True)

for idx, (algo, cost) in zip(range(2, nb_plots), model_objs):
    algo = algo.fit(tempogram.T)
    rpt_bkps = algo.predict(n_bkps=nb_bkps)
    # Make the break points "compatible" with librosa api
    # it expects index 0 and not index of the last point
    rpt_bkps.pop(-1)
    rpt_bkps = [0] + rpt_bkps
    # Translate index in the chromagram to time index in the original soundtrack
    rpt_bpks_times = librosa.frames_to_time(rpt_bkps, sr=sampling_rate)
    # Plot
    librosa.display.specshow(
        tempogram,
        y_axis="tempo",
        x_axis="time",
        ax=axs[idx],
        hop_length=hop_length,
        sr=sampling_rate,
        cmap="magma",
    )
    axs[idx].vlines(
        rpt_bpks_times, 16, 480, color="linen", linestyle="--", linewidth=1.5, alpha=0.9
    )
    axs[idx].set(title=f"Algo : {type(algo).__name__}, Cost : {cost}")
    axs[idx].label_outer()

algo = rpt.BottomUp(model="l2").fit(chromagram.T)
rpt_bkps = algo.predict(pen=50)
# Make the break points "compatible" with librosa api
# it expects index 0 and not index of the last point
rpt_bkps.pop(-1)
rpt_bkps = [0] + rpt_bkps
# Translate index in the chromagram to time index in the original soundtrack
rpt_bpks_times = librosa.frames_to_time(rpt_bkps, sr=sampling_rate)
# Plot
librosa.display.specshow(
    tempogram,
    y_axis="tempo",
    x_axis="time",
    ax=axs[nb_plots - 1],
    hop_length=hop_length,
    sr=sampling_rate,
    cmap="magma",
)
axs[nb_plots - 1].vlines(
    rpt_bpks_times, 16, 480, color="linen", linestyle="--", linewidth=1.5, alpha=0.9
)
axs[nb_plots - 1].set(title=f"Algo : {type(algo).__name__}, Cost : l2")
axs[nb_plots - 1].label_outer()