In [None]:
import librosa 
import numpy as np
import matplotlib.pyplot as plt

import steme as st

import IPython.display as ipd

In [None]:
librosa.util.list_examples()

In [None]:
# x, fs = librosa.load(librosa.example("brahms"))
# you can explore further examples if you want to:
x, fs = librosa.load(librosa.example("choice"))

In [None]:
ipd.Audio(x, rate=fs)

# linear axis versus log axis

In [None]:
linear_axis = np.arange(30,350,1)
log_axis = st.dataset.variables_non_linear()
log_axis = log_axis[log_axis < 350]

In [None]:
def plot_comparison(T, t, freqs, ttypes, subplot_titles, fig_title=None):
    """
    helper function to plot tempograms side-by-side.
    """
    figsize = (15, 5)
    num_tempograms = len(T)
    fig, ax = plt.subplots(1, num_tempograms, figsize=figsize)

    for idx in range(num_tempograms):
        kwargs = st.utils._tempogram_kwargs(t[idx], freqs[idx])

        ax[idx].imshow(T[idx], **kwargs)

        xlim = (t[idx][0], t[idx][-1])
        ylim = (freqs[idx][0], freqs[idx][-1])

        plt.setp(ax, xlim=xlim, ylim=ylim)
        
        if ttypes[idx] == "log":
            labels = [item.get_text() for item in ax[0].get_yticklabels()]
            new_labels = np.rint(log_axis[::20]).astype(int)
            ax[idx].set_yticklabels(new_labels)

        if fig_title is not None:
            fig.suptitle(fig_title, fontsize=16)

        ax[idx].set_xlabel("Time (s)")
        ax[idx].set_ylabel("Tempo (BPM)")
        ax[idx].title.set_text(subplot_titles[idx])
    return fig, ax

# tmp

In [None]:
figsize = (15, 5)
fig, ax = plt.subplots(1, 1, figsize=figsize)

tempo_estimation = librosa.feature.tempo(y=x, sr=fs)

kwargs = st.utils._tempogram_kwargs(linear_ft, linear_ffreqs)
ax.imshow(linear_fT, **kwargs)
ax.hlines(tempo_estimation[0], xmin=0, xmax=1000, color="red", label="estimated time")

xlim = (linear_ft[0], linear_ft[-1])
ylim = (linear_ffreqs[0], linear_ffreqs[-1])

plt.setp(ax, xlim=xlim, ylim=ylim)

fig.suptitle(f"Fourier Tempogram (~{np.round(tempo_estimation,2)[0]} BPM)", fontsize=16)

ax.set_xlabel("Time (s)")
ax.set_ylabel("Tempo (BPM)")

In [None]:
from matplotlib.patches import Rectangle

In [None]:
full_slice_data = linear_fT[:,805]
plt.plot(linear_ffreqs, full_slice_data)
plt.vlines(tempo_estimation, ymin=0, ymax=12, color="red", alpha=0.8)
plt.title("full slice (320-dimensional array)")
plt.xlabel("BPM")
plt.xlim(30, 350)

In [None]:
fig, ax = plt.subplots(1,1)
plt.plot(linear_ffreqs, full_slice_data)
# ax.vlines(tempo_estimation*2, ymin=0, ymax=12, color="red", alpha=0.8)
# ax.vlines(tempo_estimation, ymin=0, ymax=12, color="red", alpha=0.8)
ax.vlines(170, ymin=0, ymax=12, color="red", alpha=0.8)

# ax.title.set_text("full slice (320 samples)")
# plt.title(f"How the model is supposed to get THIS tempo value right?")
ax.set_xlabel("BPM")

# plt.title(f"What if the tempo was {np.round(tempo_estimation[0])*2} BPM?")
# plt.title("full slice (320-dimensional array)")

#add rectangle to plot
ax.add_patch(Rectangle((30, 0), 128, 12, fill=False, color="green"))

# ax.add_patch(Rectangle((38, 0), 128, 12, fill=False, color="green", linestyle="--"))

ax.add_patch(Rectangle((180, 0), 128, 12, fill=False, color="green", linestyle="--"))

In [None]:
plt.plot(full_slice_data[0:128])
# plt.plot(linear_ffreqs[0:128], full_slice_data[0:128])

# plt.vlines(tempo_estimation, ymin=0, ymax=12, color="red", alpha=0.8)
plt.title("0-shift slice (128 samples), covering from 30 BPM to 158 BPM")
# plt.xlabel("BPM")
plt.xlim(0,128)
# plt.xlim(linear_ffreqs[0],linear_ffreqs[128+0])

In [None]:
# plt.plot(linear_ffreqs[8:128+8], full_slice_data[8:128+8])
plt.plot(full_slice_data[8:128+8])
# plt.vlines(tempo_estimation, ymin=0, ymax=12, color="red", alpha=0.8)
plt.title("8-shift slice (128 samples), covering from 38 BPM to 162 BPM")
# plt.xlabel("BPM")
plt.xlim(0,128)
# plt.xlim(linear_ffreqs[8],linear_ffreqs[128+8])

In [None]:
figsize = (15, 5)
fig, ax = plt.subplots(1, 1, figsize=figsize)

tempo_estimation = librosa.feature.tempo(y=x, sr=fs)

kwargs = st.utils._tempogram_kwargs(ft, ffreqs)
ax.imshow(fT, **kwargs)
ax.hlines(tempo_estimation[0], xmin=0, xmax=1000, color="red", label="estimated time")

xlim = (ft[0], ft[-1])
ylim = (ffreqs[0], ffreqs[-1])

plt.setp(ax, xlim=xlim, ylim=ylim)

fig.suptitle(f"Fourier Tempogram (~{np.round(tempo_estimation,2)[0]} BPM)", fontsize=16)

ax.set_xlabel("Time (s)")
ax.set_ylabel("Tempo (BPM)")

In [None]:
fT[:,805].shape

In [None]:
# everything but now for log
full_slice_data = fT[:,805]
plt.plot(ffreqs, full_slice_data)
plt.xscale("log")
plt.vlines(tempo_estimation, ymin=0, ymax=12, color="red", alpha=0.8)
plt.title("full slice (153-dimensional array)")
plt.xlabel("BPM")
plt.xlim(30, 350)

In [None]:
import matplotlib.ticker as ticker


In [None]:
fig, ax = plt.subplots(1,1)
plt.plot(ffreqs, full_slice_data)
# ax.vlines(tempo_estimation*2, ymin=0, ymax=12, color="red", alpha=0.8)
ax.vlines(tempo_estimation, ymin=0, ymax=12, color="red", alpha=0.8)
# ax.vlines(170, ymin=0, ymax=12, color="red", alpha=0.8)
ax.set_xscale("log")
ax.set_xticks([], [])
ax.set_xticks(ffreqs.astype(int)[::15])
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())

# ax.title.set_text("full slice (320 samples)")
# plt.title(f"How the model is supposed to get THIS tempo value right?")
ax.set_xlabel("BPM")

# plt.title(f"What if the tempo was {np.round(tempo_estimation[0])*2} BPM?")
plt.title("full slice (153-dimensional array)")

#add rectangle to plot
ax.add_patch(Rectangle((30, 0), ffreqs[11:128+11][-1]-ffreqs[11:128+11][0], 12, fill=False, color="green"))

ax.add_patch(Rectangle((38, 0), ffreqs[18:128+18][-1]-ffreqs[18:128+18][0], 12, fill=False, color="green", linestyle="--"))

# ax.add_patch(Rectangle((180, 0), 128, 12, fill=False, color="green", linestyle="--"))

In [None]:
ffreqs[18:128+18]

In [None]:
plt.plot(full_slice_data[11:128+11])
plt.title("11-shift slice (128 samples), covering from 30 BPM to 273 BPM")
plt.xlim(0,128)

In [None]:
plt.plot(full_slice_data[18:128+18])
plt.title("18-shift slice (128 samples), covering from 34 BPM to 308 BPM")
plt.xlim(0,128)

# end tmp

# tmp 2

In [None]:
figsize = (15, 5)
fig, ax = plt.subplots(1, 1, figsize=figsize)

increasing_tempo = np.array([])
sr = 22500
for i in range(1,8):
    tmp = st.audio.click_track(bpm=50*i, sr=sr, duration=10)
    increasing_tempo = np.append(increasing_tempo, tmp)
    
x_fT, x_ft, x_ffreqs = st.audio.tempogram(x=increasing_tempo, sr=sr, window_size_seconds=10, t_type="hybrid", theta=linear_axis)

    
kwargs = st.utils._tempogram_kwargs(x_ft, x_ffreqs)
ax.imshow(x_fT, **kwargs)

xlim = (x_ft[0], x_ft[-1])
ylim = (x_ffreqs[0], x_ffreqs[-1])

plt.setp(ax, xlim=xlim, ylim=ylim)

fig.suptitle(f"Hybrid Tempogram", fontsize=16)

ax.set_xlabel("Time (s)")
ax.set_ylabel("Tempo (BPM)")

# end tmp 2

In [None]:
linear_fT, linear_ft, linear_ffreqs = st.audio.tempogram(x=x, sr=fs, window_size_seconds=10, t_type="fourier", theta=linear_axis)
fT, ft, ffreqs = st.audio.tempogram(x=x, sr=fs, window_size_seconds=10, t_type="fourier", theta=log_axis)

fig, ax = plot_comparison(
    T=[linear_fT, fT], 
    t=[linear_ft, ft], 
    freqs=[linear_ffreqs, ffreqs], 
    subplot_titles=["linear axis", "logarithmic axis"],
    ttypes=["linear", "log"],
    fig_title="Fourier tempogram"
)

In [None]:
linear_aT, linear_at, linear_afreqs = st.audio.tempogram(x=x, sr=fs, window_size_seconds=10, t_type="autocorrelation", theta=linear_axis)
aT, at, afreqs = st.audio.tempogram(x=x, sr=fs, window_size_seconds=10, t_type="autocorrelation", theta=log_axis)

fig, ax = plot_comparison(
    T=[linear_aT, aT], 
    t=[linear_at, at], 
    freqs=[linear_afreqs, afreqs], 
    subplot_titles=["linear axis", "logarithmic axis"],
    ttypes=["linear", "log"],
    fig_title="Autocorrelation tempogram"
)

In [None]:
linear_hT, linear_ht, linear_hfreqs = st.audio.tempogram(x=x, sr=fs, window_size_seconds=10, t_type="autocorrelation", theta=linear_axis)
hT, ht, hfreqs = st.audio.tempogram(x=x, sr=fs, window_size_seconds=10, t_type="hybrid", theta=log_axis)

fig, ax = plot_comparison(
    T=[linear_hT, hT], 
    t=[linear_ht, ht], 
    freqs=[linear_hfreqs, hfreqs], 
    subplot_titles=["linear axis", "logarithmic axis"],
    ttypes=["linear", "log"],
    fig_title="Hybrid tempogram"
)

# same representation for steady and changing tempo

In [None]:
x, x_fs = librosa.load(librosa.example("brahms"))
y, y_fs = librosa.load(librosa.example("choice"))

In [None]:
x_fT, x_ft, x_ffreqs = st.audio.tempogram(x=x, sr=x_fs, window_size_seconds=10, t_type="fourier", theta=log_axis)
y_fT, y_ft, y_ffreqs = st.audio.tempogram(x=y, sr=y_fs, window_size_seconds=10, t_type="fourier", theta=log_axis)

fig, ax = plot_comparison(
    T=[x_fT, y_fT], 
    t=[x_ft, y_ft], 
    freqs=[x_ffreqs, y_ffreqs], 
    ttypes=["log", "log"],
    subplot_titles=["changing tempo (brahms)", "steady tempo (choice)"],
    fig_title="Fourier tempogram"
)

In [None]:
x_fT, x_ft, x_ffreqs = st.audio.tempogram(x=x, sr=x_fs, window_size_seconds=10, t_type="autocorrelation", theta=log_axis)
y_fT, y_ft, y_ffreqs = st.audio.tempogram(x=y, sr=y_fs, window_size_seconds=10, t_type="autocorrelation", theta=log_axis)

fig, ax = plot_comparison(
    T=[x_fT, y_fT], 
    t=[x_ft, y_ft], 
    freqs=[x_ffreqs, y_ffreqs], 
    ttypes=["log", "log"],
    subplot_titles=["changing tempo (brahms)", "steady tempo (choice)"],
    fig_title="Autocorrelation tempogram"
)

In [None]:
x_fT, x_ft, x_ffreqs = st.audio.tempogram(x=x, sr=x_fs, window_size_seconds=10, t_type="hybrid", theta=log_axis)
y_fT, y_ft, y_ffreqs = st.audio.tempogram(x=y, sr=y_fs, window_size_seconds=10, t_type="hybrid", theta=log_axis)

fig, ax = plot_comparison(
    T=[x_fT, y_fT], 
    t=[x_ft, y_ft], 
    freqs=[x_ffreqs, y_ffreqs],
    ttypes=["log", "log"],
    subplot_titles=["changing tempo (brahms)", "steady tempo (choice)"],
    fig_title="Hybrid tempogram"
)


In [None]:
increasing_tempo = np.array([])
sr = 22500
for i in range(1,8):
    tmp = st.audio.click_track(bpm=50*i, sr=sr, duration=10)
    increasing_tempo = np.append(increasing_tempo, tmp)
    
x_fT, x_ft, x_ffreqs = st.audio.tempogram(x=increasing_tempo, sr=sr, window_size_seconds=10, t_type="fourier", theta=linear_axis)
y_fT, y_ft, y_ffreqs = st.audio.tempogram(x=increasing_tempo, sr=sr, window_size_seconds=10, t_type="fourier", theta=log_axis)

fig, ax = plot_comparison(
    T=[x_fT, y_fT], 
    t=[x_ft, y_ft], 
    freqs=[x_ffreqs, y_ffreqs], 
    subplot_titles=["linear", "log"],
    ttypes=["linear", "log"],
    fig_title="fourier tempogram"
)


# Interactive view

In [None]:
import holoviews as hv 
import panel as pn
hv.extension("bokeh", logo=False)

In [None]:
increasing_tempo = np.array([]) #np.zeros([fs*7*3])
sr = 22500
for i in range(1,8):
    tmp = audio.click_track(bpm=50*i, sr=sr, duration=3)
    increasing_tempo = np.append(increasing_tempo, tmp)

In [None]:
audio_data = np.int16(increasing_tempo * 32767)

In [None]:
ipd.Audio(increasing_tempo, rate=sr)

## Fourier

In [None]:
fT, ft, ffreqs = audio.tempogram(x=increasing_tempo, sr=sr, window_size_seconds=10, t_type="fourier", theta=log_axis)
st.utils.plot_tempogram(fT, ft, ffreqs)

In [None]:
fT, ft, ffreqs = st.audio.tempogram(x=increasing_tempo, sr=sr, window_size_seconds=10, t_type="fourier", theta=linear_axis)

spec_gram = hv.Image((ft, ffreqs, fT), ["Time (s)", "Tempo (BPM)"]).opts(width=600)
audio = pn.pane.Audio(audio_data, sample_rate=sr, name='Audio', throttle=500)

def update_playhead(x,y,t):
    if x is None:
        return hv.VLine(t)
    else:
        audio.time = x
        return hv.VLine(x)

tap_stream = hv.streams.SingleTap(transient=True)
time_play_stream = hv.streams.Params(parameters=[audio.param.time], rename={'time': 't'})
dmap_time = hv.DynamicMap(update_playhead, streams=[time_play_stream, tap_stream])
out = pn.Column( audio, 
               (spec_gram * dmap_time))

In [None]:
out

## Autocorrelation

In [None]:
spec_gram = hv.Image((at, afreqs, aT), ["Time (s)", "Tempo (BPM)"]).opts(width=600)
audio = pn.pane.Audio(audio_data, sample_rate=fs, name='Audio', throttle=500)

def update_playhead(x,y,t):
    if x is None:
        return hv.VLine(t)
    else:
        audio.time = x
        return hv.VLine(x)

tap_stream = hv.streams.SingleTap(transient=True)
time_play_stream = hv.streams.Params(parameters=[audio.param.time], rename={'time': 't'})
dmap_time = hv.DynamicMap(update_playhead, streams=[time_play_stream, tap_stream])
out = pn.Column( audio, 
               (spec_gram * dmap_time))

In [None]:
out

## Hybrid

In [None]:
spec_gram = hv.Image((ht, hfreqs, hT), ["Time (s)", "Tempo (BPM)"]).opts(width=600)
audio = pn.pane.Audio(audio_data, sample_rate=fs, name='Audio', throttle=500)

def update_playhead(x,y,t):
    if x is None:
        return hv.VLine(t)
    else:
        audio.time = x
        return hv.VLine(x)

tap_stream = hv.streams.SingleTap(transient=True)
time_play_stream = hv.streams.Params(parameters=[audio.param.time], rename={'time': 't'})
dmap_time = hv.DynamicMap(update_playhead, streams=[time_play_stream, tap_stream])
out = pn.Column( audio, 
               (spec_gram * dmap_time))

In [None]:
out