In [1]:
import numpy as np
import librosa
import IPython.display as ipd

import nn_fac.multilayer_nmf as mlnmf
import nn_fac.deep_nmf as dnmf
from nn_fac.utils.current_plot import *

import features

import model.data_manipulation
import model.autosimilarity_computation
from model import pattern_study

# %% Audio params
sr = 44100

# %% General params
eps = 1e-12
plotting = False # If you want data to be plotted

# %% Deep NMF params
all_ranks = [32,8]
n_iter = 200
n_iter_init_deep_nmf = 100
n_iter_deep = n_iter - n_iter_init_deep_nmf # 100 iterations for the initialization using multi-layer NMF

nb_bars = 12

hop_length = 32
subdivision = 96

In [2]:
# for the original song
audio_path = 'data/Come_Together.wav'
signal, _ = librosa.load(audio_path, sr=sr, mono=True)

bars = model.data_manipulation.get_bars_from_audio(audio_path) # Computing the bars

  file_sample_rate, signal = wavfile.read(filename, mmap=True)


In [5]:
oversampled_spectrogram_complex = spectrogram = features.get_spectrogram(signal, sr, hop_length=32, feature="stft_complex")
original_barwise_tf_matrix_mag = model.barwise_input.barwise_TF_matrix(oversampled_spectrogram_complex[0], bars, hop_length/sr, subdivision) + eps
original_barwise_tf_matrix_phase = model.barwise_input.barwise_TF_matrix(oversampled_spectrogram_complex[1], bars, hop_length/sr, subdivision)

median_hop = pattern_study.get_median_hop(bars, subdivision = subdivision, sampling_rate = sr)
print("Griffin-Lim")
pattern_study.TF_matrix_to_audio_signal(original_barwise_tf_matrix_mag, feature="stft", hop_length=median_hop, phase_retrieval="griffin_lim", barwise_tf_original_phase=original_barwise_tf_matrix_phase, frequency_dimension=1025, subdivision=subdivision)
print("Original phase")
pattern_study.TF_matrix_to_audio_signal(original_barwise_tf_matrix_mag, feature="stft", hop_length=median_hop, phase_retrieval="original_phase", barwise_tf_original_phase=original_barwise_tf_matrix_phase, frequency_dimension=1025, subdivision=subdivision)

# For the tests hereafter, define how to retrieve the phase
phase_retrieval = "original_phase"

[[2.3608123e-05 2.5555906e-05 2.7631035e-05 ... 9.2708055e-05
  9.1713664e-05 9.0160080e-05]
 [2.5762540e-05 2.7046104e-05 2.8183340e-05 ... 8.9397705e-05
  8.7602501e-05 8.5532200e-05]
 [2.7038765e-05 2.7419936e-05 2.7588430e-05 ... 7.7211305e-05
  7.7098812e-05 7.6566088e-05]
 ...
 [2.4319612e-03 2.5183933e-03 2.5971727e-03 ... 2.6041160e-03
  2.4796459e-03 2.3531762e-03]
 [1.8113722e-03 1.9522868e-03 2.1033881e-03 ... 2.5870085e-03
  2.4421250e-03 2.2901911e-03]
 [3.4693372e-04 1.7748478e-04 1.0798965e-05 ... 2.3345607e-03
  2.2041851e-03 2.0639615e-03]]
Griffin-Lim
Original phase


In [6]:
W_multi, H_multi, errors_multi, toc_multi = mlnmf.multilayer_beta_NMF(original_barwise_tf_matrix_mag, all_ranks = all_ranks, beta = 1, n_iter_max_each_nmf = n_iter, return_errors = True)

In [None]:
song_sdrs_multi, pattern_sdrs_multi, subset_minmax_pattern_sdrs_multi,audios_song_scale_multi, audios_patterns_multi = pattern_study.pattern_encapsulation_multinmf(W_multi, H_multi, phase_retrieval=phase_retrieval, barwise_tf_mag_original=original_barwise_tf_matrix_mag, barwise_tf_phase_original=original_barwise_tf_matrix_phase,
                                                                                                                         hop_length = median_hop, feature = "stft", frequency_dimension=1025, subdivision=subdivision,
                                                                                                                         subset_nb_bars=nb_bars)

for level_decomp in range(len(all_ranks)):
    print(f"Level {level_decomp} of the decomposition")
    print(f"SDR song scale: {song_sdrs_multi[level_decomp]}")
    ipd.display(audios_song_scale_multi[level_decomp])
    print(f"SDR pattern scale: {pattern_sdrs_multi[level_decomp][0]} +- {pattern_sdrs_multi[level_decomp][1]}")

    for iterator_patterns, ranking_patterns in enumerate(["max", "min", "median"]):
        print(f"Pattern {ranking_patterns} SDR: {subset_minmax_pattern_sdrs_multi[level_decomp][iterator_patterns]}")
        ipd.display(audios_patterns_multi[level_decomp][iterator_patterns])

    # print("Original phase")
    # pattern_study.display_audio_from_spectrogram(spectro_mag, feature=feature, hop_length=median_hop, phase_retrieval="original_phase", original_phase=phase)

In [None]:
W_deep, H_deep, errors_deep, toc_deep = dnmf.deep_KL_NMF(original_barwise_tf_matrix_mag, all_ranks = all_ranks, n_iter_max_each_nmf = n_iter_init_deep_nmf, n_iter_max_deep_loop = n_iter_deep,return_errors=True)


In [None]:
song_sdrs_deep, pattern_sdrs_deep, subset_minmax_pattern_sdrs_deep, audios_song_scale_deep, audios_patterns_deep = pattern_study.pattern_encapsulation_multinmf(W_deep, H_deep, phase_retrieval=phase_retrieval, barwise_tf_mag_original=original_barwise_tf_matrix_mag, barwise_tf_phase_original=original_barwise_tf_matrix_phase,
                                                                                                                         hop_length = median_hop, feature = "stft", frequency_dimension=1025, subdivision=subdivision,
                                                                                                                         subset_nb_bars=nb_bars)

for level_decomp in range(len(all_ranks)):
    print(f"Level {level_decomp} of the decomposition")
    print(f"SDR song scale: {song_sdrs_deep[level_decomp]}")
    ipd.display(audios_song_scale_deep[level_decomp])
    print(f"SDR pattern scale: {pattern_sdrs_deep[level_decomp][0]} +- {pattern_sdrs_deep[level_decomp][1]}")

    for iterator_patterns, ranking_patterns in enumerate(["max", "min", "median"]):
        print(f"Pattern {ranking_patterns} SDR: {subset_minmax_pattern_sdrs_deep[level_decomp][iterator_patterns]}")
        ipd.display(audios_patterns_deep[level_decomp][iterator_patterns])