# Imports

In [24]:
import tensorflow as tf
import tensorflow_io as tfio

In [25]:
from IPython.display import Audio

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact

In [27]:
%load_ext autoreload 
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
from birds.data.spectrogram import convert_audio_file_to_audio_tensor, convert_to_tensor, resample_audio_tensor, split_tensor, harmonize_tensor_shape
from birds.data.spectrogram import generate_spectrogram, generate_mel_spectrogram, generate_db_scale_mel_spectrogram

# Dataset

In [57]:
RAW_DATA_PATH = '../raw_data/'
TRAIN_SET_PATH = '../raw_data/data_30s/train/'

In [58]:
metadata_train = pd.read_csv(RAW_DATA_PATH + 'metadata_train.csv')

In [59]:
metadata_train.head(3)

Unnamed: 0,Recording_ID,Genus,Specific_epithet,Subspecies,English_name,Recordist,Country,Locality,Latitude,Longitude,...,Other_species24,Other_species25,Other_species26,Other_species27,Other_species28,Other_species29,Other_species30,Species,Path,Target
0,447407,Sonus,naturalis,,Soundscape,José Carlos Sires,Spain,"Córdoba, Córdoba, Andalucía",37.9413,-4.8958,...,,,,,,,,Sonus naturalis,Sonus-naturalis-447407_tens.ogg,0
1,387437,Sonus,naturalis,,Soundscape,José Carlos Sires,Spain,"el planerón, belchite, zaragoza, aragón",41.2784,-0.7328,...,,,,,,,,Sonus naturalis,Sonus-naturalis-387437_tens.ogg,0
2,383228,Sonus,naturalis,,Soundscape,José Carlos Sires,Spain,"río guadalmellato, córdoba, andalucía",38.0306,-4.6698,...,,,,,,,,Sonus naturalis,Sonus-naturalis-383228_tens.ogg,0


In [60]:
species_table = metadata_train.copy()[['Species', 'English_name', 'Target']]
species_table = species_table.drop_duplicates()
species_table = species_table.reset_index(drop=True)

In [61]:
species_table.head(3)

Unnamed: 0,Species,English_name,Target
0,Sonus naturalis,Soundscape,0
1,Fringilla coelebs,Common Chaffinch,1
2,Parus major,Great Tit,2


In [62]:
y_train = pd.read_csv(TRAIN_SET_PATH + 'y_train.csv')

In [63]:
y_train.head(3)

Unnamed: 0,Path,Target
0,Sonus-naturalis-447407_tens.ogg,0
1,Sonus-naturalis-387437_tens.ogg,0
2,Sonus-naturalis-383228_tens.ogg,0


# Testing functions

In [64]:
path_example = TRAIN_SET_PATH + y_train.loc[100, 'Path']
label_example = y_train.loc[100, 'Target']
print(path_example)
print(label_example)

../raw_data/data_30s/train/Turdus-merula-537177_tens.ogg
3


In [65]:
audiotensor = convert_audio_file_to_audio_tensor(path_example)

tftensor, rate = convert_to_tensor(audiotensor)

print(f"""tftensor.shape: {tftensor.shape[0]}
rate: {rate}
rate * 30 sec: {rate * 30}""")

tftensor.shape: 1323000
rate: 44100
rate * 30 sec: 1323000


In [66]:
tftensor_16k = resample_audio_tensor(tftensor, rate)

print(f"""tftensor_16k.shape: {tftensor_16k.shape[0]}
/16k: {int(tftensor_16k.shape[0] / 16_000)} sec.""")

tftensor_16k.shape: 480000
/16k: 30 sec.


In [67]:
tftensor_16k_10s = split_tensor(tftensor_16k)

print(f"""tftensor_16k_10s.shape: {tftensor_16k_10s.shape[0]}
/16k: {int(tftensor_16k_10s.shape[0] / 16_000)} sec.""")

tftensor_16k_10s.shape: 160000
/16k: 10 sec.


In [68]:
tftensor_16k_10s

<tf.Tensor: shape=(160000, 2), dtype=float32, numpy=
array([[-2.2390836e-11,  3.1039865e-10],
       [ 9.8437855e-11, -7.9635520e-10],
       [-1.8880344e-09,  7.8098378e-12],
       ...,
       [-1.4356500e-02, -1.6975790e-02],
       [ 1.1641389e-02,  8.5713174e-03],
       [ 2.5572339e-02,  2.7551372e-02]], dtype=float32)>

In [69]:
tftensor_16k_10s_mono = harmonize_tensor_shape(tftensor_16k_10s)

In [73]:
spectrogram, harmonizedtensor, label, input_rate, output_rate = generate_spectrogram(path_example, label_example)

In [74]:
mel_spectrogram, harmonizedtensor, label, input_rate, output_rate = generate_mel_spectrogram(path_example, label_example)

In [75]:
db_scale_mel_spectrogram, harmonizedtensor, label, input_rate, output_rate = generate_db_scale_mel_spectrogram(path_example, label_example)

# Spectrograms

In [None]:
@interact(target=range(50), from_record=range(0,33,3))
def print_from_target(target, from_record):
    
    files_names = list(y_train[y_train['Target'] == target].reset_index(drop=True).loc[from_record:from_record+3, 'Path'])
    files_paths = [TRAIN_SET_PATH + file_name for file_name in files_names]
    
    plt.figure(figsize=(20,5), constrained_layout=True)

    for j in range(3):
        file_name = files_names[j]
        file_path = files_paths[j]
        record_name = file_name.split("_tens.ogg")[0]
        
        spectrogram = full_spectro_generation(file_path, target, split=True)[0]

        plt.subplot(1, 3, j+1)
        plt.imshow(tf.math.log(spectrogram).numpy())
        plt.axis('off')
        plt.title(f"Record n°{from_record + j} - {record_name}\n[spectr.] - shape: {spectrogram.shape}", fontsize=12, loc='left')

    species_name = species_table[species_table['Target'] == target].loc[:, 'Species'].reset_index(drop=True)[0]
    english_name = species_table[species_table['Target'] == target].loc[:, 'English_name'].reset_index(drop=True)[0]

    plt.suptitle(f"Target {target}: {species_name.upper()} ~ {english_name}\n", fontsize=20);


In [None]:
@interact(target=range(50), record=range(33))
def display_audio_from_target(target, record):
    
    files_names = list(y_train[y_train['Target'] == target].reset_index(drop=True).loc[:, 'Path'])
    files_paths = [TRAIN_SET_PATH + file_name for file_name in files_names]
    
    return Audio(files_paths[record])

In [None]:
@interact(target=range(50), from_record=range(0,33,3))
def print_from_target(target, from_record):
    
    files_names = list(y_train[y_train['Target'] == target].reset_index(drop=True).loc[from_record:from_record+3, 'Path'])
    files_paths = [TRAIN_SET_PATH + file_name for file_name in files_names]
    
    plt.figure(figsize=(20,9), constrained_layout=True)

    for j in range(3):
        file_name = files_names[j]
        file_path = files_paths[j]
        record_name = file_name.split("_tens.ogg")[0]
        
        spectrogram = full_spectro_generation(file_path, target, split=True)[0]
        mel_spectrogram = generate_mel_spectrogram(spectrogram)

        plt.subplot(2, 3, j+1)
        plt.imshow(tf.math.log(spectrogram).numpy())
        #plt.axis('off')
        plt.title(f"Record n°{from_record + j} - {record_name}\n[spectr.] - shape: {spectrogram.shape}", fontsize=12, loc='left')
        plt.colorbar()

        plt.subplot(2, 3, 3+j+1)
        plt.imshow(tf.math.log(mel_spectrogram).numpy())
        #plt.axis('off')
        plt.title(f"[mel spectr.] - shape: {mel_spectrogram.shape}", fontsize=12, loc='left')
        plt.colorbar()
    
    
    species_name = species_table[species_table['Target'] == target].loc[:, 'Species'].reset_index(drop=True)[0]
    english_name = species_table[species_table['Target'] == target].loc[:, 'English_name'].reset_index(drop=True)[0]

    plt.suptitle(f"Target {target}: {species_name.upper()} ~ {english_name}\n", fontsize=20);