# Converting a audio dataset using a NAS 

This notebook is an example of how to convert a audio dataset using a NAS deployed on the NODE board using the OKEARTool. The OKAERTool is a Python package that provides a simple interface to interact with the OKAERTool board. The OKAERTool board is a low-cost, open-source hardware platform that can be used to deploy and test AER based systems on the edge.

The choosen dataset is "WABAD: A World Annotated Bird Acoustic Dataset for Passive Acoustic Monitoring" (available: https://zenodo.org/records/14191524). It consists of several folders which contain both recordings and annotations. There is also a csv file describing the characteristics of every sound: folder, file, bird species, starting and ending time, highest and lowest frequency, country, continent and environment. 

## Setting the NAS

The OKAERTool is plugged into the Node board where a stereo 64 channels NAS is deployed. The script below initializes the OKAERTool and creates a new instance of the PyNAVIS class that will be used later.

In [1]:
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
# sys.path.insert(1, '../src')

import okaertool as okt
from pyNAVIS import *
import time

# Create a new instance of the OkaerTool class and initialize it
okaer = okt.Okaertool()
okaer.init()

# Create a new instance of the PyNAVIS class
settings = MainSettings(num_channels=64, mono_stereo=1, on_off_both=1, address_size=4, ts_tick=0.01, bin_size=10000)

04/28/25 10:17:58 AM - INFO : No bit file loaded. Ensure that the FPGA is already programmed
04/28/25 10:17:58 AM - INFO : okaertool initialized as idle


## Importing the dataset

The function below uploads the CSV file provided in the dataset and filters the needed rows. In the same function, two new columns called "Spike file" and "Errors" are added to the original CSV file: the first one will be used to save the name of the correspondent AEDAT file for each audio, but also to be sure that each audio is reproduced and converted only once, while the second one will be used in case of errors. 

In [None]:
import os
import threading
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pydub import AudioSegment
from pydub.playback import play
from queue import Queue

def load_and_filter_csv():
    csv_path = '/home/asia/NAS/dataset_sonidos/pooled annotations MOD.csv' 
    df = pd.read_csv(csv_path, sep=';', encoding='ISO-8859-1')

    if 'Spike file' not in df.columns:
        df['Spike file'] = None
    if 'Errors' not in df.columns:
        df['Errors'] = None

    df_def = df[(df['Country'] == 'Spain') & (df['Spike file'].isna()) & (df['Errors'].isna())].copy()
    if df_def.empty:
        print("\nThere are no files available.")
        return None, None, csv_path

    return df, df_def, csv_path

## Processing the audio

Below there are the four main functions of this project. The last one works on the first row of the dataframe. It extracts the desired segment from the audio file (using the first function below) and plays it at the same time as monitoring (using the second and the third functions below). The NAS elaborates the segment and produces a serie of spikes, which are then saved in a new AEDAT file. 

In [3]:
def find_segment(df, ind, row):
    recording_folder = row['Recording site']
    recording_file = row['Recording']
    begin_time = float(row['Begin Time (s)']) * 1000  
    end_time = float(row['End Time (s)']) * 1000      

    audio_path = os.path.join('/home/asia/NAS/dataset_sonidos', recording_folder, 'Recordings', recording_file)
    if not os.path.isfile(audio_path):
        print(f"\nThe following file doesn't exist: {audio_path}")
        return
    
    audio = AudioSegment.from_file(audio_path)
    segment = audio[begin_time:end_time]
    total_duration = end_time / 1000 - begin_time / 1000  
    
    return recording_folder, recording_file, segment, begin_time, end_time, total_duration

def play_audio(segment):
    play(segment)
    
def monitor_spikes(total_duration, result_queue):
    spikes = okaer.monitor(inputs=['node_out'], duration=total_duration)
    for i, spike in enumerate(spikes):
        print(f"Input {i}:", spike.get_num_spikes())
    result_queue.put(spikes)  
    
def process_row(ind, row, df, csv_path, caller):
    
    recording_folder, recording_file, segment, begin_time, end_time, total_duration = find_segment(df, ind, row)
    
    print(f"\nConverting: {recording_file} | from {begin_time/1000}s to {end_time/1000}s")

    try:
        okaer.reset_board()

        spike_queue = Queue()

        audio_thread = threading.Thread(target=play_audio, args=(segment,)) 
        monitor_thread = threading.Thread(target=monitor_spikes, args=(total_duration, spike_queue))

        audio_thread.start()
        monitor_thread.start()
        
        audio_thread.join()
        monitor_thread.join() 
        
        spikes = spike_queue.get()  
        
        spike_file = SpikesFile(addresses=spikes[2].addresses, timestamps=spikes[2].timestamps)
        Plots.spikegram(spike_file, settings) 
        plt.close('all')
        
        if caller == 'single':
            Plots.spikegram(spike_file, settings) 
            Plots.sonogram(spike_file, settings)
            Plots.histogram(spike_file, settings)
            Plots.average_activity(spike_file, settings)
            Plots.difference_between_LR(spike_file, settings)

        file_name, file_ext = os.path.splitext(recording_file)
        converted_file = f"{file_name}_id{ind+2}"
        
        spike_folder = os.path.join('/home/asia/NAS/dataset_sonidos', recording_folder, 'Spikes')
        if not os.path.isdir(spike_folder):
            os.makedirs(spike_folder, exist_ok=True)
            print(f"\nCreating the new folder: {spike_folder}")
            
        spike_path = os.path.join(spike_folder, converted_file)
        Savers.save_AEDAT(spike_file, spike_path, settings, verbose=True)
        print(f"Name: {converted_file}.aedat")
        df.loc[ind, 'Spike file'] = f"{converted_file}.aedat"
        df.to_csv(csv_path, sep=';', encoding='ISO-8859-1', index=False)
              
        if caller == 'single':
            return spike_path

    except Exception as e:
        print(f"An error occurred while elaborating the following file: {recording_file} | index: {ind+2} | {e}")
        df.loc[ind, 'Errors'] = f"Error: {e}"
        df.to_csv(csv_path, sep=';', encoding='ISO-8859-1', index=False)

## Reproducing, converting and checking a single audio

### Reproducing 

Unplug the NAS and execute the following script to listen to the audio segment that is going to be converted. 

In [4]:
def listen_single():
    df, df_def, csv_path = load_and_filter_csv()
    if df_def is None:
        return
    
    ind = df_def.index[0]
    row = df_def.iloc[0]
    
    _, recording_file, segment, begin_time, end_time, _ = find_segment(df, ind, row)
    
    species = row['Species']

    print(f"\nReproducing: {recording_file} | from {begin_time/1000}s to {end_time/1000}s")
    print(f"Specie: {species}")
    play(segment)
    
listen_single()


Reproducing: HONDO_20230504_064200.wav | from 49.62359399s to 50.20605453s
Specie: Gallinula chloropus


### Converting and checking 

Plug in the NAS before proceeding. The script below converts a single audio and shows some plots before and after saving the AEDAT file. 

In [None]:
def main_single():
    caller = 'single'
    df, df_def, csv_path = load_and_filter_csv()
    if df_def is None:
        return

    ind = df_def.index[0]
    row = df_def.iloc[0]
    
    spike_path = process_row(ind, row, df, csv_path, caller)
    
    aedat_saved = Loaders.loadAEDAT(f"{spike_path}.aedat", settings)
    aedat_file = aedat_saved if not isinstance(aedat_saved, list) else aedat_saved[2]
    aedat_file.addresses = aedat_file.addresses.astype(np.int32)

    Plots.spikegram(aedat_file, settings) 
    Plots.sonogram(aedat_file, settings)
    Plots.histogram(aedat_file, settings)
    Plots.average_activity(aedat_file, settings)
    Plots.difference_between_LR(aedat_file, settings)

main_single()

## Converting the dataset

The function below converts the whole dataset, without reproducing the audio nor printing the graphs.

In [None]:
def main_multiple():
    caller = 'multiple'
    df, df_def, csv_path = load_and_filter_csv()
    if df_def is None:
        return

    for ind, row in df_def.iterrows():
        process_row(ind, row, df, csv_path, caller)

main_multiple()