# Funciones Musicales

In [None]:
import librosa
import math
import matplotlib.pyplot as plt
import numpy as np
import os
from os.path import join
import pandas as pd
import scipy as sp

Cuarto de tono anterior y posterior:
$\quad f_\text{anterior} = \dfrac{f_\text{actual}}{2^\frac{1}{24}}\qquad f_\text{posterior} = 2^\frac{1}{24} f_\text{actual}$

In [None]:
def quarter_tone_previous(f): # Cuarto de tono anterior.
    return f / (2 ** (1.0 / 24.0))

In [None]:
def quarter_tone_posterior(f): # Cuarto de tono posterior.
    return f * (2 ** (1.0 / 24.0))

# Funciones de Audio DSP
Lectura de Audio y Generación de Matriz de Frames.

Sea $s$ la señal de audio, la transformada de fourier de tiempo corto (STFT) $S(f,t)$ se define como:

$$S(\omega, t) =\displaystyle  \sum_{k = -\infty}^{\infty} x[n]w[n]e^{-j\omega n}$$

Con $w$ una ventana, que en este caso corresponde a la ventana Hanning.

$\omega = 2\pi f $

La STFT está en el dominio de frecuencias lineal, sin embargo la escala musical es del tipo logarítmica. Para realizar esta tranformación se multiplican las ventanas temporales de la STFT, por unos filtros triangulares $\Delta_f$, que se define a continuación:

En un intervalo de tiempo de un semitono: $\Delta f_\text{semitono} = f_\text{end} - f_\text{in}$ y así mismo para cada cuarto de tono:

Si $ f_\text{in} \leq f \leq f_\text{c} \Rightarrow \Delta_f = \dfrac{f - f_\text{in}}{f_c - f_\text{in}}$.

Si $f_c \leq f \leq f_\text{end} \Rightarrow \Delta_f = \dfrac{f_\text{end} - f}{f_c - f_\text{in}}$ 

Para cualquier otro caso $\Delta_f = 0$.



In [None]:
def triang_filt_quarter_note(fs, n_fft, n_quarter_tones, qt_freq, f_first_note):
    triang_filt = np.zeros(shape=(int(n_fft / 2) + 1, n_quarter_tones))
    f_vec = np.linspace(float(fs) / n_fft, fs / 2, n_fft / 2)
    f_inic = quarter_tone_previous(f_first_note)
    for jdx in range(n_quarter_tones):
        f_c = qt_freq[jdx]
        f_fin = quarter_tone_posterior(f_c)
        for idx in range(int(n_fft / 2) + 1):
            f = f_vec[idx]
            if f >= f_inic and f <= f_c:
                triang_filt[idx, jdx] = (f - f_inic) / (f_c - f_inic)
            # triang_filt[idx, jdx] = (f_fin - f) / (f_c - f_inic) if f > f_c and f <= f_fin else 0.0
            elif f > f_c and f <= f_fin:
                triang_filt[idx, jdx] = (f_fin - f) / (f_c - f_inic)
            if f > f_fin:
                break
        f_inic = quarter_tone_posterior(f_inic)
    return triang_filt

In [None]:
import sys 
eps = sys.float_info.epsilon
def wav_to_numpy(filename, qt_freq, show_plot=False, export=True):
    print("Convirtiendo " + filename + " a npy.")
    f_inicial = 23 # cuarto de tono debajo de B1
    f_final = 169 + 1 # cuarto de tono encima de B7
    sig, fs = librosa.load(filename)
    power_spectra = np.abs(librosa.stft(sig, win_length=win_length, n_fft=n_fft)) ** 2
    triang_filt = triang_filt_quarter_note(fs, n_fft, n_quarter_tones, qt_freq, f_first_note=f_b0)
    quarter_note_spectrogram = np.matmul(np.transpose(triang_filt), power_spectra)
    quarter_note_spectrogram = np.log10(quarter_note_spectrogram[f_inicial:f_final, :] + eps)
    if show_plot:
        indexes = 10
        plt.rcParams['figure.figsize'] = [25, 15]
        with plt.rc_context({'axes.edgecolor':'white',  'axes.labelcolor': 'white', 'xtick.color':'white', 
                                 'ytick.color':'white', 'figure.facecolor':'black'}):
            plt.imshow(quarter_note_spectrogram, origin='origin', aspect='auto')
            plt.xticks(np.linspace(0, np.size(quarter_note_spectrogram, 1), indexes), np.round(np.linspace(0, len(sig) / fs, indexes),2))
            plt.yticks(range(np.size(quarter_note_spectrogram, 0)),  quarter_tones[f_inicial:f_final])
            plt.show()
    
    timevector_librosa = np.linspace(1. / fs, len(sig) / fs, num=np.size(quarter_note_spectrogram, 1))
    out_file = join(os.path.split(filename)[0], 'npy', os.path.split(filename)[1][:-4] + '_(prueba)')
    time_file = join(os.path.split(filename)[0], 'timevector', os.path.split(filename)[1][:-4] + '_(prueba)')
    # print()
    if export:
        np.save(out_file, quarter_note_spectrogram)
        np.save(time_file, timevector_librosa)

    
def wav_to_npy_dataset(db_path, qt_freq):
    albums = sorted(os.listdir(db_path))
    albums = [album for album in albums if not album.endswith('.txt')]
    df = pd.DataFrame()
    for album in albums:
        songs = [song for song in sorted(os.listdir(join(db_path, album))) if song.endswith(".wav")]
        # wav_to_numpy(filename)
        # print(songs)
        for song in songs:
            filename = join(db_path, album, song)
            wav_to_numpy(filename, qt_freq, show_plot=True, export=False)

            
f_b0 = 30.87          
quarter_tones = []
quarter_tones_frequency = []
music_notes = ['B0', 'C1', 'C#1', 'D1', 'D#1', 'E1', 'F1', 'F#1', 'G1', 'G#1', 'A1', 'A#1',
              'B1', 'C2', 'C#2', 'D2', 'D#2', 'E2', 'F2', 'F#2', 'G2', 'G#2', 'A2', 'A#2',
              'B2', 'C3', 'C#3', 'D3', 'D#3', 'E3', 'F3', 'F#3', 'G3', 'G#3', 'A3', 'A#3',
              'B3', 'C4', 'C#4', 'D4', 'D#4', 'E4', 'F4', 'F#4', 'G4', 'G#4', 'A4', 'A#4',
              'B4', 'C5', 'C#5', 'D5', 'D#5', 'E5', 'F5', 'F#5', 'G5', 'G#5', 'A5', 'A#5',
              'B5', 'C6', 'C#6', 'D6', 'D#6', 'E6', 'F6', 'F#6', 'G6', 'G#6', 'A6', 'A#6',
              'B6', 'C7', 'C#7', 'D7', 'D#7', 'E7', 'F7', 'F#7', 'G7', 'G#7', 'A7', 'A#7',
              'B7', 'C8', 'C#8', 'D8', 'D#8']
for idx, note in enumerate(music_notes):
    actual_note = f_b0 * (2 ** (idx / 12.0))
    quarter_tones.append(note)
    quarter_tones.append(' ')
    quarter_tones_frequency.append(actual_note)
    quarter_tones_frequency.append(quarter_tone_posterior(actual_note))
n_quarter_tones = len(quarter_tones)

f_inic = quarter_tone_previous(f_b0)
n_quarter_tones = 178
win_length = 4 * 2048
overlap = int(win_length * 0.5)
n_fft = win_length
concatenated_frames = 15
l_supervector = concatenated_frames * n_quarter_tones
filename = join(os.getcwd(), 'Database', 'Carole King - Tapestry', "01 - Carole King - I Feel The Earth Move.wav")
timevector_old = np.load(join(os.getcwd(), 'Database', 'Carole King - Tapestry', 'timevector',
                              "01 - Carole King - I Feel The Earth Move_timevector.npy"))
# timevector_librosa = np.linspace(0, len(sig) / fs, num=np.size(power_spectra, 1))
# print(time)

db_path = os.getcwd() + '/Database'
wav_to_npy_dataset(db_path, quarter_tones_frequency)



# Diccionario de Acordes

In [1]:
def chord_dict(chords_list):
    
    raw_data = {
        'names': ['C:maj', 'C:min', 'C#:maj', 'C#:min', 'D:maj', 'D:min', 'D#:maj', 'D#:min',
                  'E:maj', 'E:min', 'F:maj', 'F:min','F#:maj' ,'F#:min', 'G:maj', 'G:min',
                  'G#:maj', 'G#:min', 'A:maj', 'A:min', 'A#:maj', 'A#:min', 'B:maj', 'B:min', 'N'],
        'chords': [[1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
                   [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], 
                   [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
                   [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0],
                   [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0],
                   [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0],   
                   [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0],
                   [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0],
                   [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1],
                   [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1],
                   [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
                   [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
                   [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
                   [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0],
                   [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1],
                   [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0],
                   [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0],
                   [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1],
                   [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0],
                   [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0],
                   [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0],
                   [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
                   [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1],
                   [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1], 
                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0]]
               }
    dict_chords = pd.DataFrame(raw_data, index=['C:maj', 'C:min', 'C#:maj', 'C#:min', 'D:maj', 'D:min', 'D#:maj', 'D#:min',
                  'E:maj', 'E:min', 'F:maj', 'F:min','F#:maj' ,'F#:min', 'G:maj', 'G:min',
                  'G#:maj', 'G#:min', 'A:maj', 'A:min', 'A#:maj', 'A#:min', 'B:maj', 'B:min', 'N'])
    return np.array([dict_chords['chords'][chord] for chord in chords_list])

# Sala de Pruebas

In [None]:
import librosa
import math
import matplotlib.pyplot as plt
import numpy as np
import os
from os.path import join
import pandas as pd
import scipy as sp

f_b0 = 30.87
f_inic = quarter_tone_previous(f_b0)
# n_quarter_tones = 178
win_length = 4 * 2048
overlap = int(win_length * 0.5)
n_fft = win_length
concatenated_frames = 15
l_supervector = concatenated_frames * n_quarter_tones

''' Todo lo necesario para los filtros triangulares '''
quarter_tones = []
quarter_tones_frequency = []
music_notes = ['B0', 'C1', 'C#1', 'D1', 'D#1', 'E1', 'F1', 'F#1', 'G1', 'G#1', 'A1', 'A#1',
              'B1', 'C2', 'C#2', 'D2', 'D#2', 'E2', 'F2', 'F#2', 'G2', 'G#2', 'A2', 'A#2',
              'B2', 'C3', 'C#3', 'D3', 'D#3', 'E3', 'F3', 'F#3', 'G3', 'G#3', 'A3', 'A#3',
              'B3', 'C4', 'C#4', 'D4', 'D#4', 'E4', 'F4', 'F#4', 'G4', 'G#4', 'A4', 'A#4',
              'B4', 'C5', 'C#5', 'D5', 'D#5', 'E5', 'F5', 'F#5', 'G5', 'G#5', 'A5', 'A#5',
              'B5', 'C6', 'C#6', 'D6', 'D#6', 'E6', 'F6', 'F#6', 'G6', 'G#6', 'A6', 'A#6',
              'B6', 'C7', 'C#7', 'D7', 'D#7', 'E7', 'F7', 'F#7', 'G7', 'G#7', 'A7', 'A#7',
              'B7', 'C8', 'C#8', 'D8', 'D#8']
for idx, note in enumerate(music_notes):
    actual_note = f_b0 * (2 ** (idx / 12.0))
    quarter_tones.append(note)
    quarter_tones.append(' ')
    quarter_tones_frequency.append(actual_note)
    quarter_tones_frequency.append(quarter_tone_posterior(actual_note))
n_quarter_tones = len(quarter_tones)
print(n_quarter_tones)

''' FILTRO TRIANGULAR '''
'''
def triang_filt_quarter_note(fs, n_fft, n_quarter_tones, f_first_note):
    triang_filt = np.zeros(shape=(int(n_fft / 2) + 1, n_quarter_tones))
    f_vec = np.linspace(float(fs) / n_fft, fs / 2, n_fft / 2)
    f_inic = quarter_tone_previous(f_first_note)
    for jdx in range(n_quarter_tones):
        f_c = quarter_tones_frequency[jdx]
        # f_inic = quarter_tone_previous(fc)
        # f_c = quarter_tone_posterior(f_inic)
        f_fin = quarter_tone_posterior(f_c)
        # print(f_inic, f_c, f_fin)
        #f_fin = quarter_tone
        for idx in range(int(n_fft / 2) + 1):
            f = f_vec[idx]
            if f >= f_inic and f <= f_c:
                triang_filt[idx, jdx] = (f - f_inic) / (f_c - f_inic)
            # triang_filt[idx, jdx] = (f_fin - f) / (f_c - f_inic) if f > f_c and f <= f_fin else 0.0
            elif f > f_c and f <= f_fin:
                triang_filt[idx, jdx] = (f_fin - f) / (f_c - f_inic)
            if f > f_fin:
                break
        f_inic = quarter_tone_posterior(f_inic)
    return triang_filt
'''

filename = join(os.getcwd(), 'Database', 'Carole King - Tapestry', "01 - Carole King - I Feel The Earth Move.wav")
sig, fs = librosa.load(filename)
# time = np.linspace(1./fs ,len(sig) / fs, num=len(sig))
power_spectra = np.abs(librosa.stft(sig, win_length=win_length, n_fft=n_fft)) ** 2
triang_filt = triang_filt_quarter_note(fs, n_fft, n_quarter_tones, f_first_note=f_b0)
    # plt.xlim([0, time[-1]])

In [None]:
import matplotlib.animation as animation
plt.rcParams['figure.figsize'] = [20, 5]
plot_range = 1975
indexes = 10
freq = 170

with plt.rc_context({'axes.edgecolor':'white',  'axes.labelcolor': 'white', 'xtick.color':'white', 
                         'ytick.color':'white', 'figure.facecolor':'black'}):
    # plt.imshow(np.log10(power_spectra), origin='origin', aspect='auto')
    plt.plot(triang_filt_2[:plot_range, freq])
    plt.xticks(np.linspace(0, plot_range, indexes), np.round(f_vec[np.int32(np.linspace(0, plot_range, indexes))]))

In [None]:
print(np.size(triang_filt, 0), np.size(triang_filt, 1))
print(np.size(power_spectra, 0), np.size(power_spectra, 1))
print(int(n_fft/ 2))
# print(len(time))
quarter_note_spectrogram= np.log10(np.matmul(np.transpose(triang_filt), power_spectra))
'''
indexes = 10
plt.rcParams['figure.figsize'] = [25, 15]
with plt.rc_context({'axes.edgecolor':'white',  'axes.labelcolor': 'white', 'xtick.color':'white', 
                         'ytick.color':'white', 'figure.facecolor':'black'}):
    plt.imshow(np.log10(quarter_note_spectrogram), origin='origin', aspect='auto')
    # plt.plot(triang_filt_2[:plot_range, freq])
    # plt.xticks(np.linspace(0, plot_range, indexes), np.round(f_vec[np.int32(np.linspace(0, plot_range, indexes))]))
    plt.xticks(np.linspace(0, np.size(quarter_note_spectrogram, 1), indexes), np.round(np.linspace(0, len(sig) / fs, indexes),2))
     #plt.yticks(range(np.size(quarter_note_spectrogram, 0)), [str(round(x, 2)) for x in quarter_tones_frequency] + quarter_tones)
    plt.yticks(range(np.size(quarter_note_spectrogram, 0)),  quarter_tones)
'''

In [None]:
cont = 0
for n, f in enumerate(quarter_tones_frequency):
    print(n, quarter_tones[n], f)

# nueva frecuencia inicial: 59.9 Hz. Primer índice 23. Cuarto de tono anterior a C2
# nueva frecuencia final: 3951 Hz. Último índice 
f_inicial = 23
octavas = 2
f_final = f_inicial + 24 * octavas
indexes = 10
plt.rcParams['figure.figsize'] = [25, 15]
with plt.rc_context({'axes.edgecolor':'white',  'axes.labelcolor': 'white', 'xtick.color':'white', 
                         'ytick.color':'white', 'figure.facecolor':'black'}):
    plt.imshow(np.log10(quarter_note_spectrogram[f_inicial:f_final, :]), origin='origin', aspect='auto')
    # plt.plot(triang_filt_2[:plot_range, freq])
    # plt.xticks(np.linspace(0, plot_range, indexes), np.round(f_vec[np.int32(np.linspace(0, plot_range, indexes))]))
    plt.xticks(np.linspace(0, np.size(quarter_note_spectrogram, 1), indexes), np.round(np.linspace(0, len(sig) / fs, indexes),2))
     #plt.yticks(range(np.size(quarter_note_spectrogram, 0)), [str(round(x, 2)) for x in quarter_tones_frequency] + quarter_tones)
    plt.yticks(range(np.size(quarter_note_spectrogram[f_inicial:f_final, :], 0)),  quarter_tones[f_inicial:f_final])