## Objetivo:
Entender como as sub-bandas alteram a detecção de onsets e se tem alguma em específico que consiga pegar a informação do instrumento que acaba carregando a informação de tempo da música toda.

In [None]:
import glob
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import mir_eval
import pandas as pd

import IPython.display as ipd

In [None]:
plt.rcParams["figure.figsize"] = (15,10)

In [None]:
FS = 44100
CHANNELS = [0, 8, 32, 64, 128]

Primeiro, vamos pegar alguns áudios cuja performance não foi tão boa ao usarmos a detecção de onset sem distinção de faixas de frequência e tentar novamente separando.

In [None]:
candombe_audio_path = '../datasets/candombe/csic.1995_ansina2_04.wav'
candombe, _ = librosa.load(candombe_audio_path, sr=FS)

x_df = pd.read_csv(candombe_audio_path.replace('.wav', '.csv'), names=["timestamp", "beat"])
ground_truth = x_df['timestamp'].values

In [None]:
START = 20
STOP = 30

In [None]:
onset_subbands = librosa.onset.onset_strength_multi(y=candombe[START*FS:STOP*FS], sr=FS, channels=CHANNELS)
times = librosa.times_like(onset_subbands, sr=FS)
true_beats = ground_truth[(ground_truth >= START) & (ground_truth < STOP)] - START
onset_frames = []

for i in range(onset_subbands.shape[0]):
    onset_frames.append(librosa.onset.onset_detect(onset_envelope=onset_subbands[i], sr=FS))

# cálculo padrão dos beats
beat_frames = []

for i in range(len(onset_subbands)):
    _, beats = librosa.beat.beat_track(onset_envelope=onset_subbands[i], sr=FS)
    beat_frames.append(beats)
"""
for i in range(len(onset_subbands)):
    print(f"{'*'*10} subband {i} {'*'*10}")
    print(f"fmeasure {mir_eval.beat.f_measure(ground_truth, librosa.frames_to_time(beat_frames[i], FS))}")
    CMLc, CMLt, AMLc, AMLt = mir_eval.beat.continuity(ground_truth, librosa.frames_to_time(beat_frames[i], FS))
    print(f"CMLc = {CMLc}\nCMLt = {CMLt}\nAMLc = {AMLc}\nAMLt = {AMLt}")
"""

num_plots = onset_subbands.shape[0]
fig, ax = plt.subplots(nrows=num_plots, sharex=True)

for i in range(num_plots):
    ax[i].set_title(f"subband_{i}")
    ax[i].plot(times, onset_subbands[i], alpha=0.3, label=f'subband_{i}')
    ax[i].vlines(times[onset_frames[i]], 0, onset_subbands[i].max(), alpha=0.3, color='r', linestyle='--', label='onsets')
    ax[i].vlines(times[beat_frames[i]], 0, onset_subbands[i].max(), alpha=0.9, color='g', label='detected beat')
    ax[i].vlines(true_beats, 0, onset_subbands[i].max(), color='b', alpha=0.9, label='true beat')
    
fig.legend()

In [None]:
song = '../datasets/candombe/csic.1995_ansina2_01.wav'

x2, _ = librosa.load(song, mono=True, sr=FS)
x2_df = pd.read_csv(candombe_audio_path.replace('.wav', '.csv'), names=["timestamp", "beat"])
ground_truth2 = x2_df['timestamp'].values
true_beats = ground_truth[(ground_truth >= START) & (ground_truth < STOP)] - START

onset_subbands = librosa.onset.onset_strength_multi(y=x2[START*FS:STOP*FS], sr=FS, channels=CHANNELS)
times = librosa.times_like(onset_subbands, sr=FS)
onset_frames = []

for i in range(onset_subbands.shape[0]):
    onset_frames.append(librosa.onset.onset_detect(onset_envelope=onset_subbands[i], sr=FS))

# cálculo padrão dos beats
beat_frames = []

for i in range(len(onset_subbands)):
    _, beats = librosa.beat.beat_track(onset_envelope=onset_subbands[i], sr=FS)
    beat_frames.append(beats)
"""
for i in range(len(onset_subbands)):
    print(f"{'*'*10} subband {i} {'*'*10}")
    print(f"fmeasure {mir_eval.beat.f_measure(ground_truth2, librosa.frames_to_time(beat_frames[i], FS))}")
    CMLc, CMLt, AMLc, AMLt = mir_eval.beat.continuity(ground_truth2, librosa.frames_to_time(beat_frames[i], FS))
    print(f"CMLc = {CMLc}\nCMLt = {CMLt}\nAMLc = {AMLc}\nAMLt = {AMLt}")
"""    

num_plots = onset_subbands.shape[0]
fig, ax = plt.subplots(nrows=num_plots, sharex=True)

for i in range(num_plots):    
    ax[i].set_title(f"subband_{i}")
    ax[i].plot(times, onset_subbands[i], alpha=0.3, label=f'subband_{i}')
    ax[i].vlines(times[onset_frames[i]], 0, onset_subbands[i].max(), alpha=0.3, color='r', linestyle='--', label='onsets')
    ax[i].vlines(times[beat_frames[i]], 0, onset_subbands[i].max(), alpha=0.9, color='g', label='detected beat')
    ax[i].vlines(true_beats, 0, onset_subbands[i].max(), color='b', alpha=0.9, label='true beat')
    
fig.legend()

In [None]:
# good performance song
song = '../datasets/candombe/zavala.muniz.2014_52.wav'

x2, _ = librosa.load(song, mono=True, sr=FS)
x2_df = pd.read_csv(candombe_audio_path.replace('.wav', '.csv'), names=["timestamp", "beat"])
ground_truth2 = x2_df['timestamp'].values

onset_subbands = librosa.onset.onset_strength_multi(y=x2[START*FS:STOP*FS], sr=FS, channels=[0, 4, 8, 64, 128])
times = librosa.times_like(onset_subbands, sr=FS)
true_beats = ground_truth2[(ground_truth2 >= START) & (ground_truth2 < STOP)] - START
onset_frames = []

for i in range(onset_subbands.shape[0]):
    onset_frames.append(librosa.onset.onset_detect(onset_envelope=onset_subbands[i], sr=FS))

# cálculo padrão dos beats
beat_frames = []

for i in range(len(onset_subbands)):
    _, beats = librosa.beat.beat_track(onset_envelope=onset_subbands[i], sr=FS)
    beat_frames.append(beats)

num_plots = onset_subbands.shape[0]
fig, ax = plt.subplots(nrows=num_plots, sharex=True)

for i in range(num_plots):
    ax[i].set_title(f"subband_{i}")
    ax[i].plot(times, onset_subbands[i], alpha=0.4, label=f'subband_{i}')
    ax[i].vlines(times[onset_frames[i]], 0, onset_subbands[i].max(), alpha=0.4, color='r', linestyle='--', label='onsets')
    ax[i].vlines(times[beat_frames[i]], 0, onset_subbands[i].max(), alpha=0.9, color='g', label='detected beat')
    ax[i].vlines(true_beats, 0, onset_subbands[i].max(), color='b', alpha=0.9, label='true beat')
    
fig.legend()

In [None]:
for i in range(len(onset_subbands)):
    print(f"{'*'*10} subband {i} {'*'*10}")
    print(f"fmeasure {mir_eval.beat.f_measure(true_beats, librosa.frames_to_time(beat_frames[i], FS))}")
    CMLc, CMLt, AMLc, AMLt = mir_eval.beat.continuity(true_beats, librosa.frames_to_time(beat_frames[i], FS))
    print(f"CMLc = {CMLc}\nCMLt = {CMLt}\nAMLc = {AMLc}\nAMLt = {AMLt}")

# resultados gerais

In [None]:
dataset_result = {}

In [None]:
file_path = [i[:-4] for i in glob.glob('../datasets/candombe/*.wav')]

for file in file_path:
        print(f"processing {file}")
        x, fs = librosa.load(f"{file}.wav", mono=True, sr=FS)
        x_df = pd.read_csv(f"{file}.csv", names=["timestamp", "beat"])
        ground_truth = x_df['timestamp'].values
        beat_frames = []
        onset_frames = []
        librosa_timestamps = []
        
        onset_subbands = librosa.onset.onset_strength_multi(y=x, sr=FS, channels=[0, 4, 8, 32, 128])
        dataset_result[file] = {}
        
        for i in range(len(onset_subbands)):
            onset_frames.append(librosa.onset.onset_detect(onset_envelope=onset_subbands[i], sr=FS))
            
        for i in range(len(onset_subbands)):
            _, beats = librosa.beat.beat_track(onset_envelope=onset_subbands[i], sr=FS)
            beat_frames.append(beats)
            librosa_timestamp = librosa.frames_to_time(beats, sr=FS)
            librosa_timestamps.append(librosa_timestamp)

        for i in range(len(onset_subbands)):
            cmlc_librosa, cmlt_librosa, amlc_librosa, amlt_librosa = mir_eval.beat.continuity(ground_truth, librosa_timestamps[i])
            
            dataset_result[file][f"subband{i}"] = {
                "ground_truth_beats": ground_truth,
                "librosa_beats": librosa_timestamps[i],
                "f_score_librosa": mir_eval.beat.f_measure(ground_truth, librosa_timestamps[i]),
                "p_score_librosa": mir_eval.beat.p_score(ground_truth, librosa_timestamps[i]),
                "cmlc_librosa": cmlc_librosa,
                "cmlt_librosa": cmlt_librosa,
                "amlc_librosa": amlc_librosa,
                "amlt_librosa": amlt_librosa
            }

In [None]:
df = pd.DataFrame.from_dict({(i,j): dataset_result[i][j] 
                            for i in dataset_result.keys() 
                            for j in dataset_result[i].keys()},
                            orient='index')

In [None]:
df

In [None]:
df.xs('subband0', level=1, drop_level=False).median()

In [None]:
df.xs('subband1', level=1, drop_level=False).median()

In [None]:
df.xs('subband2', level=1, drop_level=False).median()

In [None]:
df.xs('subband3', level=1, drop_level=False).median()

In [None]:
df.xs('subband0', level=1, drop_level=False).sort_values('f_score_librosa', ascending=True)[['f_score_librosa', 'p_score_librosa', 'cmlc_librosa', 'cmlt_librosa', 'amlc_librosa', 'amlt_librosa']]

In [None]:
df.xs('subband1', level=1, drop_level=False).sort_values('f_score_librosa', ascending=True)[['f_score_librosa', 'p_score_librosa', 'cmlc_librosa', 'cmlt_librosa', 'amlc_librosa', 'amlt_librosa']]

In [None]:
df.xs('subband2', level=1, drop_level=False).sort_values('f_score_librosa', ascending=True)[['f_score_librosa', 'p_score_librosa', 'cmlc_librosa', 'cmlt_librosa', 'amlc_librosa', 'amlt_librosa']]

In [None]:
df.xs('subband3', level=1, drop_level=False).sort_values('f_score_librosa', ascending=True)[['f_score_librosa', 'p_score_librosa', 'cmlc_librosa', 'cmlt_librosa', 'amlc_librosa', 'amlt_librosa']]

In [None]:
freqs = librosa.fft_frequencies(sr=FS, n_fft=2048)

In [None]:
freqs.shape

In [None]:
subband0_freq = freqs[0:5]

In [None]:
subband0_freq_max = subband0_freq[-1]

In [None]:
subband0_freq_max

# áudios

In [None]:
x, fs = librosa.load('../datasets/candombe/csic.1995_ansina2_04.wav', sr=FS, mono=True)

In [None]:
X = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))

In [None]:
onset_subbands = librosa.onset.onset_strength_multi(y=x, sr=FS, channels=[0, 4, 8, 32, 128, 256])

In [None]:
S = librosa.feature.melspectrogram(y=x, sr=FS, n_mels=128, fmax=11025.0)

In [None]:
librosa.display.specshow(librosa.amplitude_to_db(np.abs(S), ref=np.max), y_axis='mel')

In [None]:
Y = S.copy();
Y[5:,::] = 0

In [None]:
librosa.mel_frequencies(n_mels=128, fmin=0.0, fmax=11025.0, htk=False)[0:5]

In [None]:
librosa.display.specshow(librosa.amplitude_to_db(np.abs(Y), ref=np.max), y_axis='mel')

In [None]:
ipd.Audio(x, rate=FS)

In [None]:
plt.plot(x[0:10*FS])

In [None]:
subband0_audio = np.real(librosa.istft(Y))

In [None]:
# TODO: adicionar os clicks do ground truth pra ter uma referência
ipd.Audio(subband0_audio, rate=FS)

In [None]:
plt.plot(subband0_audio)

In [None]:
W = X.copy();
W[0:5,::] = 0
W[9::, ::] = 0

In [None]:
librosa.display.specshow(librosa.amplitude_to_db(np.abs(W), ref=np.max), y_axis='log')

In [None]:
subband1_audio = np.real(librosa.istft(W))
ipd.Audio(subband1_audio, rate=FS)