# Notebook 2: Plot spectrograms

## Train, test validate split
The data is split into training (70%), validation (20%) and test (10%) data, for training the model. The main hypothesis is that each call corresponds to a breeding stage. The number of items per breeding stage is imbalanced, splitting the dataset takes this into account by setting the stratify parameter to the 'breeding stage' column.

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# load audio metadata
df = pd.read_csv('./data/Accipiter_gentilis.csv')

df_train, df_test = train_test_split(df, train_size=0.7, stratify=df['breeding stage'], random_state=0)
df_validate, df_test = train_test_split(df_test, train_size=0.66, stratify=df_test['breeding stage'], random_state=0)
print('The training dataset has {} entries, the validation dataset {}, the testing dataset {}.'.format(
        len(df_train), len(df_validate), len(df_test)))

The training dataset has 129 entries, the validation dataset 36, the testing dataset 20.


## Plotting
A spectrogram is plotted for each audio file. Spectrograms are stored in separate forlders for train, validate and test data sets.

In [2]:
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
from os.path import exists

def decode(mp3_audio_path, wav_audio_path):
    """Decode an mp3 file to wav, convert mono to stereo, normalize to -1 dB, save"""
    resp = subprocess.call([
        'sox', mp3_audio_path, wav_audio_path, 'remix', '-', 'norm', '-1'
    ])
    if resp != 0:
        raise RuntimeError("Call to external converter failed with error code %d." % (resp))

def plot_spec(audio_path, img_path):
    """Plot a simple spectrogram for processing"""
    # load data from file into numpy.array
    data, sample_rate = librosa.load(audio_path)
    # do fourier transform
    data_ft = librosa.stft(data)
    data_db = librosa.amplitude_to_db(abs(data_ft))
    plt.figure(figsize=(14, 5))
    librosa.display.specshow(data_db, cmap='gray_r', sr=sample_rate)
    plt.savefig(img_path)
    plt.close() # close the figure

def plot_set(df_set, set_name):
    """Plot a set and store the spectrograms in the corresponding dirs"""
    for cat_num in df_set['Catalogue number']:
        mp3_audio_path = './data/audio/%s.mp3' % cat_num
        wav_audio_path = './data/audio/%s.wav' % cat_num
        img_path = './data/spec/%s/%s.png' % (set_name, cat_num)
        if not exists(img_path):
            decode(mp3_audio_path, wav_audio_path)
            plot_spec(wav_audio_path, img_path)

In [3]:
plot_set(df_train, 'train')
plot_set(df_test, 'test')
plot_set(df_validate, 'validate')