In [2]:
import os
import pandas as pd
import soundfile as sf
from soundfile import LibsndfileError
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from ds_creation.download_utils import download_dataset

# folder for metadata
md_path = os.path.join('data_cache', 'metadata')
if not os.path.exists(md_path):
    os.makedirs(md_path)

# folders for data
ds_dir = 'data'
audio_dir = 'audio'
spec_dir = 'mammals_calls_grey'

data_path = os.path.join(ds_dir, audio_dir)
if not os.path.exists(data_path):
    os.makedirs(data_path)
    
spec_path = os.path.join(ds_dir, spec_dir)
if not os.path.exists(spec_path):
    os.makedirs(spec_path)

npy_path = os.path.join(ds_dir, 'npy_files')
if not os.path.exists(npy_path):
    os.makedirs(npy_path)   

DOWNLOAD_FILES = False
url = 'https://whoicf2.whoi.edu/science/B/whalesounds/fullCuts.cfm'

## Download dataset

In [None]:
if DOWNLOAD_FILES:
    download_dataset(data_path, url, 'getSpecies', 'pickYear')

In [2]:
# Create metadata file with species and file counts
folder_list = []
for folder in os.listdir(data_path):
    folder_path = os.path.join(data_path, folder)
    print(f'Processing species {folder}')
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            if file.endswith('.wav'):
                file_path = os.path.join(folder_path, file)
                try:
                    f = sf.SoundFile(file_path)
                    duration = int(f.frames / f.samplerate)
                    f.close()
                except LibsndfileError as e:
                    print(f'Error reading {file_path}: {e}')
                    continue

                folder_list.append({'species': folder, 'filename': file, 'duration_sec': duration})


species_df = pd.DataFrame(folder_list)
species_df.to_csv(os.path.join(md_path, 'files_len.csv'), index=False)

Processing species Finless Porpoise
Processing species West Indian Manatee
Processing species Southern Right Whale
Processing species Gray Whale
Processing species Harbor Porpoise
Processing species Long-Finned Pilot Whale
Processing species New Zealand Fur Seal
Processing species Long Beaked (Pacific) Common Dolphin
Processing species Fraser's Dolphin
Processing species Killer Whale
Processing species Bowhead Whale
Error reading data/audio/Bowhead Whale/8800501P.wav: Error opening 'data/audio/Bowhead Whale/8800501P.wav': Format not recognised.
Error reading data/audio/Bowhead Whale/8800501O.wav: Error opening 'data/audio/Bowhead Whale/8800501O.wav': Format not recognised.
Error reading data/audio/Bowhead Whale/8800501U.wav: Error opening 'data/audio/Bowhead Whale/8800501U.wav': Format not recognised.
Error reading data/audio/Bowhead Whale/8800501K.wav: Error opening 'data/audio/Bowhead Whale/8800501K.wav': Format not recognised.
Error reading data/audio/Bowhead Whale/8800501S.wav: Err

Nel file 'files_len.csv' sono presenti tutte le durate dei singoli file audio in secondi arrotondati per difetto, quindi vado a contare quanti ritagli da 2 secondi riesco ad ottenere per ogni specie con la sliding window con hop di 1 secondo.

In [3]:
md_ds = pd.read_csv(os.path.join(md_path, 'files_len.csv'))
md_ds = md_ds.dropna(subset=['duration_sec'])
long_md_ds = md_ds[md_ds['duration_sec'] > 1]

In [4]:
# dataframe with all audio and dataframe with audio longer than 1 second
tot_audio_df = md_ds.drop(columns=['filename'])
long_audio_df = long_md_ds.drop(columns=['filename'])

# sum of audio lengths by species
sum_audio_len = tot_audio_df.groupby('species').agg(all_audio_len = ('duration_sec', 'sum'))
sum_long_audio_len = long_audio_df.groupby('species').agg(filtered_audio_len = ('duration_sec', 'sum'))

# merge dataframes and calculate the number of seconds lost by filtering out audio shorter than 2 seconds
sum_audio_len['filtered_audio_len'] = sum_long_audio_len['filtered_audio_len']
sum_audio_len['difference'] = sum_audio_len['all_audio_len'] - sum_audio_len['filtered_audio_len']

print(sum_audio_len.sort_values(by='difference', ascending=False))

                                      all_audio_len  filtered_audio_len  \
species                                                                   
Killer Whale                                   4631              3473.0   
Long-Finned Pilot Whale                        2737              2344.0   
Common Dolphin                                 1878              1525.0   
Striped Dolphin                                 955               635.0   
Pantropical Spotted Dolphin                    2570              2291.0   
Short-Finned (Pacific) Pilot Whale             3072              2796.0   
Northern Right Whale                            744               534.0   
Atlantic Spotted Dolphin                        402               194.0   
Humpback Whale                                 6857              6656.0   
Spinner Dolphin                                2128              1937.0   
Fin, Finback Whale                            13795             13626.0   
False Killer Whale       

## Creazione spettrogrammi

In [5]:
FRAME_SIZE = 1024
HOP_SIZE = 512
OUTPUT_DIR = spec_path
CHUNK_SIZE = 2

In [10]:
def chunk_data(data: np.array, sample_rate: int, chunk_size: int, hop: int) -> list:
    """
    Chunk audio data into smaller segments.
    Args:
        data (np.array): Audio data array.
        sample_rate (int): Sample rate of the audio data.
        chunk_size (int): Size of each chunk in seconds.
        hop (int): Hop size in seconds.
    Returns:
        list: List of audio chunks.
    """
    # get number of samples per chunk
    chunk_samples = int(sample_rate * chunk_size)

    # get chunks with specified hop size
    chunks = [data[i:i + chunk_samples] for i in range(0, len(data), int(hop * sample_rate)) if i + chunk_samples <= len(data)]
    return chunks

def save_chunk(plot_struct: dict, data: np.array, output_file: str, cmap='magma'):
    """
    Save a spectrogram chunk as an image file.
    Args:
        plot_struct (dict): Dictionary containing matplotlib figure and axis (keys: 'fig', 'ax').
        data (np.array): Spectrogram data array.
        output_file (str): Path to save the output image file.
        cmap (str): Colormap to use for the spectrogram.
    """
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))
    ax = plot_struct['ax']
    fig = plot_struct['fig']
    ax.clear()
    ax.axis('off')
    im = ax.imshow(data, aspect='auto', origin='lower', cmap=cmap)

    fig.savefig(output_file, bbox_inches='tight', transparent=True)
    plt.close(fig)

In [11]:
def preprocessing(full_data: np.array, sample: int, species_dir: dict, file_name: str, sft_config: dict, chunk_config: dict, cmap: str):
    """
    Preprocess audio data to generate and save spectrogram chunks as images and numeric data.
    Args:
        full_data (np.array): Full audio data array.
        sample (int): Sample rate of the audio data.
        species_dir (dict): Directory to save the spectrograms and numeric data (keys: 'spec', 'num').
        file_name (str): Base name for the output files.
        sft_config (dict): Configuration for Short-Time Fourier Transform (keys: 'win', 'hop', 'fs').
        chunk_config (dict): Configuration for chunking the audio data (keys: 'size', 'hop').
        cmap (str): Colormap to use for the spectrogram.
    """
    matplotlib.use('Agg')
    fig, ax = plt.subplots(figsize=(10, 4))

    # chunk the data
    chunked_data = chunk_data(full_data, sample, chunk_config['size'], chunk_config['hop'])
    chunk_num = 0
    for data in chunked_data:
        composed_filename = file_name+'-'+str(chunk_num)
        output_file = os.path.join(species_dir['spec'], composed_filename + ".png")

        # STF calculation
        SFT = signal.ShortTimeFFT(sft_config['win'], sft_config['hop'], sft_config['fs'])
        s_x = SFT.stft(data)

        spectrogram = np.abs(s_x)**2

        # log scaling
        log_spectrogram = np.log(spectrogram + 1e-10)

        # saving spectrogram numeric data as .npy file
        if not os.path.exists(species_dir['num']):
            os.makedirs(species_dir['num'])
        np.save(os.path.join(species_dir['num'], file_name + "-" + str(chunk_num) + ".npy"), log_spectrogram)

        # saving spectrogram image as PNG
        if not os.path.exists(output_file):
            save_chunk({'fig': fig, 'ax': ax}, log_spectrogram, output_file, cmap)

        chunk_num += 1
    
    plt.close(fig)

In [12]:
def species_spec(dirs, species_list, output_dir, ds_path, stf_config, chunk_config, cmap):
    # definizione della finestra di Hann
    hann_win = signal.windows.hann(stf_config['frame_win'])
    j = 0
    for curr_dir in dirs:
        if curr_dir not in species_list:
            continue
        print(f'Processing directory: {curr_dir}: {j+1}/{len(species_list)}')
        curr_files = os.listdir(os.path.join(ds_path, curr_dir))
        i = 0
        for file in curr_files:
            print(f'Processing {i+1}/{len(curr_files)} files in {curr_dir}', end='\r')
            if file.endswith('.wav'):
                try:
                    x, sr = sf.read(os.path.join(ds_path, curr_dir, file))
                except LibsndfileError:
                    continue
                spec_curr_dir = os.path.join(output_dir['spec'], curr_dir)
                num_curr_dir = os.path.join(output_dir['num'], curr_dir)
                stf_config['win'] = hann_win
                preprocessing(x, sr, {'spec': spec_curr_dir, 'num': num_curr_dir}, file.split('.')[0], stf_config, chunk_config, cmap)
            
            i += 1
        j += 1

            

In [13]:
tree = os.walk(os.path.join(data_path))
root, dirs, files = next(tree)
unpopular_species_list = md_ds['species'].unique()

stf_config = {'frame_win': FRAME_SIZE, 'hop': HOP_SIZE, 'fs': FRAME_SIZE}
spec_config = {'size': CHUNK_SIZE, 'hop': CHUNK_SIZE}

output_dir = {'spec': spec_path, 'num': npy_path}

species_spec(dirs, unpopular_species_list, output_dir, data_path, stf_config, spec_config, 'gray')

Processing directory: Finless Porpoise: 1/55
Processing directory: West Indian Manatee: 2/55
Processing directory: Southern Right Whale: 3/55
Processing directory: Gray Whale: 4/55ht Whale
Processing directory: Harbor Porpoise: 5/55
Processing directory: Long-Finned Pilot Whale: 6/55
Processing directory: New Zealand Fur Seal: 7/55Whale
Processing directory: Long Beaked (Pacific) Common Dolphin: 8/55
Processing directory: Fraser's Dolphin: 9/55ic) Common Dolphin
Processing directory: Killer Whale: 10/55hin
Processing directory: Bowhead Whale: 11/55
Processing directory: Commerson's Dolphin: 12/55
Processing directory: Ross Seal: 13/55lphin
Processing directory: Fin, Finback Whale: 14/55
Processing directory: False Killer Whale: 15/55
Processing directory: Weddell Seal: 16/55Whale
Processing directory: Sperm Whale: 17/55
Processing directory: Sea Otter: 18/55ale
Processing directory: Harp Seal: 19/55
Processing directory: Short-Finned (Pacific) Pilot Whale: 20/55
Processing directory: J