# Generating spectrograms from dataset

In [25]:
import os
import matplotlib.pyplot as plt
import librosa
import librosa.display
from pathlib import Path
import shutil
import numpy as np

Configuration settings for the processing

In [26]:
# Configure paths here
CLIPS_PATH = Path("test-wav-clips").resolve() # Path module is machine independent, can create either a PosixPath or a WindowsPath
SAVE_IMAGE_PATH = Path("test-imgs-with-folders")
SPECIES = ["common", "bottlenose", "melon-headed"]
# Configure parameters as you wish, currently they are equal to the default values in the function signature of save_spectrogram_image
SAMPLING_RATE = 48000  # gemma's improved sampling rate
FFT_NUM = 512  # fft number
DPI = 96  # dots per inch of your screen
MAX_FREQ = 22000
MIN_FREQ = 3000
IMAGE_SIZE = (413, 202)

CLIPS_PATH

PosixPath('/Users/matteohe/Desktop/DA/ml-project/test-wav-clips')

Functions for generating and saving spectrograms and finding clips in the file system.

NB: matplotlib only works with real dimensions and not directly with pixels. So if you want to show or save an image of certain pixel you need to find out what dpi your screen uses:

The following link allows you to detect the dpi of your screen:
https://www.infobyip.com/detectmonitordpi.php


In [32]:
def save_spectrogram_image(
    input_path,
    output_path,
    image_name,
    sampling_rate=48000,
    n_fft=512,
    dpi=96,  # this should be dpi of your own screen
    max_freq=22000,  # for cropping
    min_freq=3000,  # for cropping
    img_size=(413, 202),
):
    """
    This function takes in the above parameters and
    generates a spectrogram from a given sample recording and
    saves the spectrogram image
    """
    f_step = sampling_rate / n_fft
    min_bin = int(min_freq / f_step)
    max_bin = int(max_freq / f_step)

    # Generate image
    x, sr = librosa.load(input_path, sr=sampling_rate)
    X = librosa.stft(x, n_fft=n_fft)  # Apply fourier transform
    X = X[ min_bin:max_bin, :]  # Crop image vertically (frequency axis) from min_bin to max_bin

    # TODO change refs
    Xdb = librosa.amplitude_to_db(
        abs(X), ref=np.max
    )  # Convert amplitude spectrogram to dB-scaled spec
    fig = plt.figure(
        frameon=False, figsize=(img_size[0] / dpi, img_size[1] / dpi), dpi=dpi
    )  # Reduce image
    
    ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
    ax.set_axis_off()
    fig.add_axes(ax)
    librosa.display.specshow(Xdb, cmap="gray_r", sr=sr, x_axis="time", y_axis="hz", clim=[np.mean(Xdb),0])

    # Save image
    fig.savefig(os.path.join(output_path, str(image_name) + ".png"))
    plt.close(fig)

# file-system independent function for getting all the wav files from the given root directory
def get_all_wavfiles(root_path):
    for root, dirs, files in os.walk(root_path):
        for file_name in files:
            path = os.path.join(root, file_name)
            if path.endswith(".wav"):
                yield(path)

def create_storage_for_images(directory_to_store_images):
    """Create storage for images"""
    if os.path.exists(directory_to_store_images):
        shutil.rmtree(directory_to_store_images)
    os.makedirs(directory_to_store_images)


In [12]:
# executable code
create_storage_for_images(SAVE_IMAGE_PATH)
counter = 1

for clip_path in get_all_wavfiles(CLIPS_PATH):
    if not clip_path.endswith(".wav"):  # defensive code
        continue
    # to use default values in function signature use the next line
    # save_spectrogram_image(clip_path, SAVE_IMAGE_PATH, str(counter)) 
    print("saving...")
    save_spectrogram_image(
        clip_path,
        SAVE_IMAGE_PATH,
        counter,
        SAMPLING_RATE,
        FFT_NUM,
        DPI, # this should be dpi of your own screen
        MAX_FREQ, # for cropping
        MIN_FREQ, # for cropping
        IMAGE_SIZE,
    )
    print("saved!")
    counter += 1

print("All images have been created")

saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
saved!
saving...
save

In [33]:
count = 0
for specie in SPECIES:
    curr_input_dir = os.path.join(CLIPS_PATH, specie)
    curr_output_dir = os.path.join(SAVE_IMAGE_PATH, specie)
    print(curr_input_dir, curr_output_dir)
    create_storage_for_images(curr_output_dir)
    for clip_path in get_all_wavfiles(curr_input_dir):
        if not clip_path.endswith(".wav"):  # defensive code
            continue
        
        print("saving...", clip_path)
        save_spectrogram_image(
            clip_path,
            curr_output_dir,
            count,
            SAMPLING_RATE,
            FFT_NUM,
            DPI, # this should be dpi of your own screen
            MAX_FREQ, # for cropping
            MIN_FREQ, # for cropping
            IMAGE_SIZE,
        )
        print("saved!")
        count += 1

/Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common test-imgs-with-folders/common
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation598.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation229.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation573.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation215.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation201.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation567.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation188.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation639.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wav-clips/common/Annotation163.wav
saved!
saving... /Users/matteohe/Desktop/DA/ml-project/test-wa

## Resources and commands:

- `find ./train/bottlenose/ -maxdepth 1 -type f | head -n5 | xargs -d '\n' rm -f --`
- `find ./train/bottlenose/ -maxdepth 1 -type f | head -n5 | xargs cp -t ./test/bottlenose/`

- https://www.tensorflow.org/tutorials/images/cnn
- https://www.pyimagesearch.com/2018/12/31/keras-conv2d-and-convolutional-layers/
- https://vijayabhaskar96.medium.com/-tutorial-image-classification-with-keras-flow-from-directory-and-generators-95f75ebe5720 