In [9]:
import os
import pandas as pd
import numpy as np
from librosa.core import resample, to_mono
import wavio
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank


In [10]:
def plot_signals(signals):
    fig, axes = plt.subplots(nrows=2, ncols=5, sharex=False,
                             sharey=True, figsize=(20,5))
    fig.suptitle('Time Series', size=16)
    i = 0
    for x in range(2):
        for y in range(5):
            axes[x,y].set_title(list(signals.keys())[i])
            axes[x,y].plot(list(signals.values())[i])
            axes[x,y].get_xaxis().set_visible(False)
            axes[x,y].get_yaxis().set_visible(False)
            i += 1

In [11]:
def plot_fft(fft):
    fig, axes = plt.subplots(nrows=2, ncols=5, sharex=False,
                             sharey=True, figsize=(20,5))
    fig.suptitle('Fourier Transforms', size=16)
    i = 0
    for x in range(2):
        for y in range(5):
            data = list(fft.values())[i]
            Y, freq = data[0], data[1]
            axes[x,y].set_title(list(fft.keys())[i])
            axes[x,y].plot(freq, Y)
            axes[x,y].get_xaxis().set_visible(False)
            axes[x,y].get_yaxis().set_visible(False)
            i += 1

In [12]:
def plot_fbank(fbank):
    fig, axes = plt.subplots(nrows=2, ncols=5, sharex=False,
                             sharey=True, figsize=(20,5))
    fig.suptitle('Filter Bank Coefficients', size=16)
    i = 0
    for x in range(2):
        for y in range(5):
            axes[x,y].set_title(list(fbank.keys())[i])
            axes[x,y].imshow(list(fbank.values())[i],
                    cmap='hot', interpolation='nearest')
            axes[x,y].get_xaxis().set_visible(False)
            axes[x,y].get_yaxis().set_visible(False)
            i += 1

In [13]:
def plot_mfccs(mfccs):
    fig, axes = plt.subplots(nrows=2, ncols=5, sharex=False,
                             sharey=True, figsize=(20,5))
    fig.suptitle('Mel Frequency Cepstrum Coefficients', size=16)
    i = 0
    for x in range(2):
        for y in range(5):
            axes[x,y].set_title(list(mfccs.keys())[i])
            axes[x,y].imshow(list(mfccs.values())[i],
                    cmap='hot', interpolation='nearest')
            axes[x,y].get_xaxis().set_visible(False)
            axes[x,y].get_yaxis().set_visible(False)
            i += 1

In [35]:
rootFolder = "bird_data"
wavFolder = os.path.join(rootFolder, "wav_files")
bird_calls_df = pd.read_csv(os.path.join(rootFolder, "bird_calls.csv"))

In [36]:
bird_calls_df.head()

Unnamed: 0,fname,label
0,American_Goldfinch_XC114342.wav,American_Goldfinch
1,American_Goldfinch_XC124312.wav,American_Goldfinch
2,American_Goldfinch_XC133564.wav,American_Goldfinch
3,American_Goldfinch_XC133565.wav,American_Goldfinch
4,American_Goldfinch_XC141469.wav,American_Goldfinch


In [37]:
bird_calls_df.set_index('fname', inplace=True)

In [39]:
# add column with the length of the signal in seconds
for f in bird_calls_df.index:
    rate, signal = wavfile.read(os.path.join(wavFolder,f))
    bird_calls_df.at[f, "length"] = signal.shape[0]/rate

In [40]:
bird_calls_df.head()

Unnamed: 0_level_0,label,length
fname,Unnamed: 1_level_1,Unnamed: 2_level_1
American_Goldfinch_XC114342.wav,American_Goldfinch,136.968375
American_Goldfinch_XC124312.wav,American_Goldfinch,52.92
American_Goldfinch_XC133564.wav,American_Goldfinch,50.599184
American_Goldfinch_XC133565.wav,American_Goldfinch,55.222857
American_Goldfinch_XC141469.wav,American_Goldfinch,54.058934


In [41]:
bird_classes = list(np.unique(bird_calls_df.label))

In [47]:
bird_classes_distribution = bird_calls_df.groupby(['label'])['length'].mean()

In [48]:
bird_classes_distribution

label
American_Goldfinch        52.322228
American_Robin            75.790837
Barn_Swallow             132.134248
Blue-grey_Gnatcatcher     60.431434
Blue_Jay                  49.986092
Carolina_Chickadee        56.634403
Carolina_Wren             50.195712
Cedar_Waxwing             39.937296
Northern_Cardinal         66.524785
Ruby-crowned_Kinglet      53.225308
Name: length, dtype: float64