In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
from tqdm import tqdm 

import numpy as np
import pandas as pd

import librosa
import librosa.display

from audiovocana.conf import *
from audiovocana.dataset import get_dataset
from audiovocana.preprocessing import (
    get_dataframe,
    get_audio_path
)
import audiovocana.ffmpeg_utils.ffmpeg_utils as ffmpeg

import matplotlib.pyplot as plt
import seaborn as sns

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


~~~~~~ AUDIOVOCANA SETTINGS ~~~~~~
AUDIOPARAMS 
 {'sr': 250000, 'mono': True}
STFTPARAMS 
 {'n_fft': 8192, 'hop_length': 512, 'win_length': 8192, 'center': True, 'window': 'hann'}
SPECTRALCENTROIDPARAMS 
 {'sr': 250000, 'n_fft': 8192, 'hop_length': 512, 'freq': None}
SPECTRALBANDWIDTHPARAMS 
 {'sr': 250000, 'n_fft': 8192, 'hop_length': 512, 'freq': None, 'centroid': None, 'norm': True, 'p': 2}
SPECTRALFLATNESSPARAMS 
 {'n_fft': 8192, 'hop_length': 512, 'amin': 1e-10, 'power': 2.0}
ZEROCRPARAMS 
 {'frame_length': 2048, 'hop_length': 512, 'center': True}
MELPARAMS 
 {'sr': 250000, 'n_fft': 8192, 'n_mels': 128, 'fmin': 0, 'fmax': 125000.0, 'htk': True}
MFCCPAMARS 
 {'sr': 250000, 'n_mfcc': 13, 'dct_type': 2, 'norm': 'ortho', 'htk': True}
mel fiterbank shape = (128, 4097)
Minimun waveform length accepted is 4104 PCM points.
Minimun audio duration accepted is 16.416 miliseconds.
STFT time resolution = 32.768 ms.
STFT frequency resolution = 30.5101293629485 Hz.


In [3]:
hop_length = int(0.25 * 256)
n_fft = 256

FFTPLOTPARAMS = {
    # for x axis settings
    'sr': SR,
    'hop_length': hop_length,
    'x_axis': 'time',
    # for y axis settings
    'fmax': FMAX,
    'y_axis': 'linear',
    # color
    'cmap': 'gray', #'gray', 'PuBu_r', 'RdBu',

}

FFTPARAMS = {
    'n_fft': n_fft,
    'hop_length': hop_length,
    'win_length':  n_fft,
    'center': True,
    'window': 'hann'
}


NBFFTBINS = 1 + FFTPARAMS['n_fft'] / 2

In [4]:
plt.style.use('ggplot')
MARKER = '.'
MARKERSIZE = 4
LINESTYLE ='-'
FONTSIZE = 12
FIGSIZE = (12, 8 )

kwargs = {'marker': MARKER, 'linestyle': 'solid', 'linewidth': 1, 'markersize': MARKERSIZE}


FILTERS = {'vocalization': 1, 'year': 17}
CSV_PATH = '/home/utilisateur/Desktop/palomars/data/full/dataset.csv'
CACHE_FOLDER = '/home/utilisateur/Desktop/palomars/cache/full_dataset'
AUDIO_FOLDER = "/media/utilisateur/LACIE SHARE/paloma-USV-data/audio"
RESULTS_FOLDER = "/home/utilisateur/Desktop/palomars/usv-experiments/full-dataset/results/vocalization:1_year:17/clustering"

In [5]:
df = get_dataframe(csv_path=CSV_PATH)
df.head()

Reading csv from /home/utilisateur/Desktop/palomars/data/full/dataset.csv.
Found 4217 events from 46 different experiments and 132 different recordings


Unnamed: 0,t0,t1,duration,event,postnatalday,vocalization,nest,year,audio_path,experiment,recording,mother
0,12.5711,12.6146,0.0435,1,1,1,E3,19,/media/utilisateur/LACIE SHARE/paloma-USV-data...,19N3EP1,62,E
1,12.7518,12.8204,0.0686,2,1,2,E3,19,/media/utilisateur/LACIE SHARE/paloma-USV-data...,19N3EP1,62,E
2,13.0119,13.0539,0.042,3,1,2,E3,19,/media/utilisateur/LACIE SHARE/paloma-USV-data...,19N3EP1,62,E
3,13.1061,13.1425,0.0364,4,1,2,E3,19,/media/utilisateur/LACIE SHARE/paloma-USV-data...,19N3EP1,62,E
4,13.6437,13.6627,0.019,5,1,2,E3,19,/media/utilisateur/LACIE SHARE/paloma-USV-data...,19N3EP1,62,E


**Retrieve Clustering**

In [6]:
tmp = pd.read_csv(os.path.join(RESULTS_FOLDER, "cluster_assigns/mean_stft-ward-euclidean.csv"))
tmp = tmp.assign(
    nest=tmp.nest.apply(lambda s: s.split("'")[1]),
    mother=tmp.mother.apply(lambda s: s.split("'")[1])
)

MERGE_COLUMNS = ['mother', 'vocalization', 'year', 'nest', 'postnatalday', 'event', 'recording']
clustering = pd.merge(tmp, df, on=MERGE_COLUMNS, how='inner')

In [7]:
rows = [r for idx, r in clustering.iterrows()]

**Compute spectrogram**

In [8]:
for row in tqdm(rows[500: ]):

    y, sr = ffmpeg.load_audio_file(
                audio_filename=row.audio_path,
                start_second=row.t0 - 0.02,
                duration_second=row.duration + 0.02
    )

    D = np.abs(librosa.stft(y.flatten(), **FFTPARAMS))
    fig = plt.figure(figsize=(10, 6))                         
    librosa.display.specshow(librosa.power_to_db(D, ref=np.max), **FFTPLOTPARAMS)
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Experiment {row.experiment} - Recording {row.recording} - Event {row.event}')
    plt.tight_layout()
    
    figname = f"{row.experiment}_r{row.recording}_e{row.event}.png"
    for n in range(2, 9):
        fig.savefig(os.path.join(RESULTS_FOLDER, "fft_images", f"clust{n}", str(row[f'clust{n}']), figname))
    
    plt.close(fig)

100%|██████████| 437/437 [06:30<00:00,  1.12it/s]


In [9]:
print("FIN ! ")

FIN ! 
