# Classificazione specie
A partire dal dataset dei vocalizzi di Watkins si procede a creare un dataset per la classificazione delle specie di mammiferi marini più popolose. <br>
Come primo step vengono ritagliati tutti gli audio in modo che abbiano una lunghezza pari a 2 secondi, gli audio più corti vengono eliminati.<br>
Per gli audio dispari potrei scegliere se buttare via il secondo di eccesso o effettuare in generale il ritaglio a sliding window di un secondo.

In [18]:
import os
import pandas as pd
from pathlib import Path
import soundfile as sf
from soundfile import LibsndfileError
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

import tensorflow as tf

data_path = '../dataset_creation'
md_path = 'metadata'
ds_path = 'dataset'
spec_path = 'spectrograms'

Nel file 'metadata.csv' sono presenti tutte le durate dei singoli file audio, quindi vado a contare quanti ritagli da 2 secondi riesco ad ottenere per ogni specie con la sliding window con hop di 1 secondo.

In [2]:
md_ds = pd.read_csv(Path(data_path, md_path, 'metadata.csv'))

def time_norm(row):
    try:
        f = sf.SoundFile(Path(data_path, ds_path, row['species'], row['filename']+'.wav'))
        row['CS:'] = int(f.frames / f.samplerate)
        f.close()
    except LibsndfileError:
        print('corrupted file:', row['filename'])
        row['CS:'] = None
    return row

md_ds = md_ds.apply(time_norm, axis=1)
md_ds = md_ds.dropna(subset=['CS:'])
# Eliminazione audio di 1 secondo
long_md_ds = md_ds[md_ds['CS:'] > 1]

corrupted file: 8800501J
corrupted file: 8800501K
corrupted file: 8800501L
corrupted file: 8800501M
corrupted file: 8800501N
corrupted file: 8800501O
corrupted file: 8800501P
corrupted file: 8800501Q
corrupted file: 8800501R
corrupted file: 8800501S
corrupted file: 8800501T
corrupted file: 8800501U
corrupted file: 8800501V
corrupted file: 8800501W
corrupted file: 8800501X


In [3]:
tot_audio_df = md_ds.drop(columns=['filename','CU:','SR:'])
long_audio_df = long_md_ds.drop(columns=['filename','CU:','SR:'])

sum_audio_len = tot_audio_df.groupby('species').agg(all_audio_len = ('CS:', 'sum'))
df = long_audio_df.groupby('species').agg(filtered_audio_len = ('CS:', 'sum'))
sum_audio_len['filtered_audio_len'] = df['filtered_audio_len']
sum_audio_len['difference'] = sum_audio_len['all_audio_len'] - sum_audio_len['filtered_audio_len']
print(sum_audio_len.sort_values(by='difference', ascending=False))

                                      all_audio_len  filtered_audio_len  \
species                                                                   
Killer Whale                                 4631.0              3473.0   
Long-Finned Pilot Whale                      2538.0              2164.0   
Common Dolphin                               1878.0              1525.0   
Striped Dolphin                               969.0               649.0   
Pantropical Spotted Dolphin                  2554.0              2277.0   
Short-Finned (Pacific) Pilot Whale           1086.0               810.0   
Spinner Dolphin                              2159.0              1946.0   
Northern Right Whale                          744.0               534.0   
Atlantic Spotted Dolphin                      402.0               194.0   
Humpback Whale                               6857.0              6656.0   
Fin, Finback Whale                          14209.0             14040.0   
Sperm Whale              

In [4]:
# Filtraggio del dataset per prendere solo le specie che hanno più di 1000 frammenti di audio
filtered_df = long_md_ds[long_md_ds['species'].isin(sum_audio_len[sum_audio_len['filtered_audio_len'] > 1000].index)]
print(filtered_df['species'].unique())

['Walrus' 'Bowhead Whale' 'Fin, Finback Whale' 'Sperm Whale'
 'Common Dolphin' 'Humpback Whale' 'Killer Whale'
 'Long-Finned Pilot Whale' 'Pantropical Spotted Dolphin' 'Spinner Dolphin'
 'Weddell Seal' 'White-sided Dolphin']


## Creazione spettrogrammi

In [5]:
if not os.path.exists(spec_path):
    os.makedirs(spec_path)
FRAME_SIZE = 1024
HOP_SIZE = 512
OUTPUT_DIR = Path(spec_path)

In [6]:
def chunk_data(data, sample_rate, chunk_size, hop):
    # calcolo il numero di campioni per il chunk
    chunk_samples = int(sample_rate * chunk_size)

    # creazione dei chunk
    chunks = [data[i:i + chunk_samples] for i in range(0, len(data), hop*sample_rate) if i + chunk_samples <= len(data)]
    return chunks

def save_chunk(ax, fig, data, species, file_name):
    # salvataggio dello spettrogramma
    ax.clear()
    ax.axis('off')
    im = ax.imshow(data, aspect='auto', origin='lower', cmap='magma')

    filename = file_name + ".png" 
    category_dir = OUTPUT_DIR / species
    if not category_dir.exists():
        category_dir.mkdir(parents=True)

    fig.savefig(category_dir / filename, bbox_inches='tight', transparent=True)
    plt.close(fig)

In [7]:
CHUNK_SIZE = 2
def preprocessing(full_data, sample, species, file_name, win, hop=1):
    matplotlib.use('Agg')
    fig, ax = plt.subplots(figsize=(10, 4))

    # il segnale audio viene diviso in chunk
    chunked_data = chunk_data(full_data, sample, CHUNK_SIZE, hop)
    chunk_num = 0
    for data in chunked_data:
        composed_filename = file_name+'-'+str(chunk_num)
        # Controllo se il file esiste già
        if (OUTPUT_DIR / species / (composed_filename + ".png")).exists():
            print(f'Already exists: {composed_filename}', end='\r')

        # calcolo dell'STF
        SFT = signal.ShortTimeFFT(win, hop=HOP_SIZE, fs=FRAME_SIZE)
        s_x = SFT.stft(data)

        # calcolo valori per lo spettrogramma, viene ritornata la versione logaritmica dei valori
        spectrogram = np.abs(s_x)**2

        # normalizzazione dello spettrogramma
        log_spectrogram = np.log(spectrogram + 1e-10)

        # salvataggio del chunk dello spettrogrammma in formato PNG
        
        save_chunk(ax, fig, log_spectrogram, species, composed_filename)

        chunk_num += 1
    
    plt.close(fig)

In [8]:
def species_spec(dirs, species_list, path=data_path, ds_path=ds_path):
    # definizione della finestra di Hann
    hann_win = signal.windows.hann(FRAME_SIZE)
    j = 0
    for curr_dir in dirs:
        if curr_dir not in species_list:
            continue
        print(f'Processing directory: {curr_dir}: {j+1}/{len(species_list)}')
        curr_files = os.listdir(Path(path, ds_path, curr_dir))
        i = 0
        for file in curr_files:
            print(f'Processing {i+1}/{len(curr_files)} files in {curr_dir}', end='\r')
            if file.endswith('.wav'):
                try:
                    x, sr = sf.read(Path(path, ds_path, curr_dir, file))
                except LibsndfileError:
                    continue
                preprocessing(x, sr, curr_dir, file.split('.')[0], hann_win)
            
            i+=1
        j += 1

            

In [None]:
# spettrogrammi specie popolose
popular_species_list = filtered_df['species'].unique()
tree = os.walk(Path(data_path, ds_path))
root, dirs, files = next(tree)

#species_spec(dirs, popular_species_list)

In [10]:
tree = os.walk(spec_path)
root, dirs, files = next(tree)
num_files_list = [len(os.listdir(Path(spec_path, curr_dir))) for curr_dir in dirs]
num_files_series = pd.Series(num_files_list, index=dirs)
print(num_files_series.sort_values(ascending=False))

Sperm Whale                    42634
Fin, Finback Whale             13272
Humpback Whale                  6279
Bowhead Whale                   2555
Killer Whale                    2386
Spinner Dolphin                 1757
Long-Finned Pilot Whale         1684
Pantropical Spotted Dolphin     1679
Weddell Seal                    1352
Common Dolphin                  1175
White-sided Dolphin              857
Walrus                           683
dtype: int64


In [19]:
tree = os.walk(Path(data_path, ds_path))
root, dirs, files = next(tree)
unpopular_species_list = md_ds['species'].unique()

unpopular_species_list = [sp for sp in unpopular_species_list if sp not in popular_species_list]

species_spec(dirs, unpopular_species_list)

Processing directory: Atlantic Spotted Dolphin: 1/42
Processing directory: Bearded Seal: 2/42tted Dolphin
Processing directory: Beluga, White Whale: 3/42
Processing directory: Bottlenose Dolphin: 4/42e
Processing directory: Boutu, Amazon River Dolphin: 5/42
Processing directory: Clymene Dolphin: 6/42er Dolphin
Processing directory: Commerson's Dolphin: 7/42
Processing directory: Dall's Porpoise: 8/42
Processing directory: Dusky Dolphin: 9/42
Processing directory: False Killer Whale: 10/42
Processing directory: Finless Porpoise: 11/42e
Processing directory: Fraser's Dolphin: 12/42
Processing directory: Grampus, Risso's Dolphin: 13/42
Processing directory: Gray Seal: 14/42isso's Dolphin
Processing directory: Gray Whale: 15/42
Processing directory: Harbor Porpoise: 16/42
Processing directory: Harbour Seal: 17/42
Processing directory: Harp Seal: 18/42
Processing directory: Heaviside's Dolphin: 19/42
Processing directory: Hooded Seal: 20/42lphin
Processing directory: Irawaddy Dolphin: 21/42