# Generating spectrograms from dataset

reference: https://github.com/JoshWheeler08/DolphinAcoustics-Classifier

This jupyter notebook is for everything related to generating spectrograms: including functions that fetch your input files, functions that generate spectrograms from an input wav form and saves them to a specified output file path.

This code is fully os-independent and fs-independent, so you don't have to worry whether you are running on a windows os or IOS or linux, they will all work. But do make sure you set up a virtual environment correctly before continuing.

In [None]:
# if you installed all the dependencies already, ignore this block of code
!pip3 install -r requirements.txt 

In [2]:
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
from pathlib import Path
import shutil
import numpy as np

**The following code block is a Configurations/Settings block for the processing part of this file.**

In [2]:
# Configure input and download paths here.
# the path module is operating sytem independent, it can create a posix path or a windows path.
CLIPS_PATH = Path("wav-data").resolve() # path to where you stored input wav-clips of dolphin sounds/whistles.
SAVE_IMAGE_PATH = Path("img-data") # path to where you want to save your images.

# specify the classes of species there are in your input data here
SPECIES = ["common", "bottlenose", "melon-headed"]

# Configure parameters as you wish, currently they are equal to the default values in the function signature of save_spectrogram_image
SAMPLING_RATE = 48000  # gemma's improved sampling rate
FFT_NUM = 512  # fft number
DPI = 96  # dots per inch of your screen, explained ahead why this is important
MAX_FREQ = 22000
MIN_FREQ = 3000
IMAGE_SIZE = (413, 202) # (x, y)
REF = np.max # save a function or a value that will act as a reference point in the amplitude_to_db function
# https://librosa.org/doc/main/generated/librosa.amplitude_to_db.html

## The following code-block declares and defines functions for generating and saving spectrograms and finding clips in the file system.

NB: matplotlib only works with real dimensions and not directly with pixels. So if you want to show or save an image of certain pixel you need to find out what dpi your screen uses.

The following link allows you to detect the dpi of your screen:
https://www.infobyip.com/detectmonitordpi.php


In [3]:
def save_spectrogram_image(
    input_path,
    output_path,
    image_name,
    sampling_rate=48000,
    n_fft=512,
    dpi=96,
    max_freq=22000, 
    min_freq=3000,  
    img_size=(413, 202),
    ref=np.max,
):
    """
    This function takes in the above parameters and
    generates a spectrogram from a given sample recording passed in by the "input_path" and
    saves the spectrogram image in "output_path" with the name "image_name".

    sampling_rate and n_fft variables are core variables that are used in the process of spectrogram generation.
    dpi is the dots per inch of your screen.
    max_freq and min_freq are used for cropping the image, read Dzhemma's report for a deeper understanding.
    img_size should be the desired size of the saved images.
    ref is the function/value you are using as a reference for fixing the zero in the amplitude_to_db function.

    If ref is changed, you probably want to change the clim parameter as well, as it is currently designed for Ydb.

    """
    f_step = sampling_rate / n_fft
    min_bin = int(min_freq / f_step)
    max_bin = int(max_freq / f_step)

    # load wav file, apply short-time-fourier-transform, and crop the 2d array vertically, removing unwanted rows in the data.
    y, sr = librosa.load(input_path, sr=sampling_rate)
    Y = librosa.stft(y, n_fft=n_fft)
    Y = Y[ min_bin:max_bin, :]  # Crop image vertically (frequency axis) from min_bin to max_bin

    # change the amplitude from a linear scale to decibel scale (logarithmic)
    Ydb = librosa.amplitude_to_db(
        abs(Y), ref=ref
    )
    # initialise plot
    fig = plt.figure(
        frameon=False, figsize=(img_size[0] / dpi, img_size[1] / dpi), dpi=dpi
    )
    
    # remove axes from the plot
    ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
    ax.set_axis_off()
    fig.add_axes(ax)

    # plot the spectrogram on figure object in gray_r color mapping.
    librosa.display.specshow(Ydb, cmap="gray_r", sr=sr, x_axis="time", y_axis="hz", clim=[np.mean(Ydb),0])

    # Save image at "output_path/img_name.png"
    fig.savefig(os.path.join(output_path, str(image_name) + ".png"))
    plt.close(fig)

def get_all_wavfiles(root_path):
    """
    File-system independent function for getting all the wav files from the given root directory.
    Use walk function from the os module to get the root, dir and files in the given root_path.
    Extract all paths to files and returns them in a generator so that it can be iterated over in a loop.
    """
    for root, dirs, files in os.walk(root_path):
        for file_name in files:
            path = os.path.join(root, file_name)
            if path.endswith(".wav"):
                yield(path)

def create_storage_for_images(directory_to_store_images):
    """
    Create storage for images with shutil and os libraries.
    """
    if os.path.exists(directory_to_store_images):
        shutil.rmtree(directory_to_store_images)
    os.makedirs(directory_to_store_images)


## Executable code that loops through all the input wav-files and saves generated spectrograms

In [4]:
# use the count as the image name so that we can check if any of them were left out after the execution
count = 0
# make sure your input file directory has folders under the name of the species declared in the species array.
for specie in SPECIES:
    curr_input_dir = os.path.join(CLIPS_PATH, specie)
    curr_output_dir = os.path.join(SAVE_IMAGE_PATH, specie)
    create_storage_for_images(curr_output_dir)
    for clip_path in get_all_wavfiles(curr_input_dir):
        if not clip_path.endswith(".wav"):  # defensive code, steps over any files that does not have the .wav ending
            continue
        # print("saving...", clip_path)
        save_spectrogram_image(
            clip_path,
            curr_output_dir,
            count,
            SAMPLING_RATE,
            FFT_NUM,
            DPI,
            MAX_FREQ, 
            MIN_FREQ, 
            IMAGE_SIZE,
            REF,
        )
        # print("saved!")
        count += 1

## Split the dataset of the generated spectrograms into training and testing folders.
This automatic method of splitting data sets can help you generate datasets ready for training in the whistle_classifier file. 

In [5]:
def get_files_from_folder(path):
    # can't reuse get_all_wavfiles here because we need to return an array of all the files
    # to count how much data we have
    files = os.listdir(path)
    return np.asarray(files)

def generate_training_and_testing_datasets(path_to_data, path_to_test_data, train_test_ratio=0.6):
    """
    Takes in a path_to_data which is the input path of where all the spectrogram images are stored, and
    path_to_test_data which is where you want to move images into the test folders.
    The train_test_ratio specifies the the ratio of the split.
    """
    _, dirs, _ = next(os.walk(path_to_data))

    # counts training data per species
    counter_per_species = np.zeros((len(dirs)))
    for i in range(len(dirs)):
        path = os.path.join(path_to_data, dirs[i])
        files = get_files_from_folder(path)
        counter_per_species[i] = len(files)

    test_counter = np.round(counter_per_species * (1 - train_test_ratio))

    # transfers files
    for i in range(len(dirs)):
        path_to_original = os.path.join(path_to_data, dirs[i])
        path_to_save = os.path.join(path_to_test_data, dirs[i])

        # creates dir
        if not os.path.exists(path_to_save):
            os.makedirs(path_to_save)

        # moves data
        files = get_files_from_folder(path_to_original)
        for j in range(int(test_counter[i])):
            dst = os.path.join(path_to_save, files[j])
            src = os.path.join(path_to_original, files[j])
            shutil.move(src, dst)

executable code.

In [None]:
TEST_DATA = Path("test-data")
TRAIN_TEST_RATIO = 0.7
generate_training_and_testing_datasets(SAVE_IMAGE_PATH, TEST_DATA, TRAIN_TEST_RATIO)