# British Birdsong Classification Using CNN on the Spectrogram Images

This project is aiming at identifying the genus of a bird from 66 different genuses using a CNN model on the spectrogram images of the bird songs.

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import soundfile as sf
import re
import librosa
import librosa.display
from tqdm import tqdm
import sklearn
import keras
import keras.layers as layers
from keras.models import Sequential

Using TensorFlow backend.


## 1. Load Data

In [2]:
path = './british-birdsong-dataset/songs/'
files= os.listdir(path)
nfile = len(files);
meta = pd.read_csv('./british-birdsong-dataset/birdsong_metadata.csv')

## 2. Constructing Datasets

In [3]:
def index(filename_):
    '''returns the index of the filename in metadata'''
    return np.where(meta.file_id == np.int(re.findall('\d+', filename_)[0]))[0][0]

In [4]:
def boostrap_sample(song_, num, rs, tclip):
    '''sample num clips from song_ '''
    spcs = []
    
    for j in range(num):
        pstart = np.random.randint(len(song_) - tclip*rs)
        pend = pstart + tclip*rs
        clip = song_[pstart:pend]
        
        # construct spectrograms for clips
        X = librosa.stft(clip, n_fft=100, hop_length=5000)
        # Xdb = librosa.amplitude_to_db(abs(X))
        
        spcs.append(abs(X))
    
    spcs = np.array(spcs)
        
    return spcs

In [5]:
time_clip = 20;
npick = 100;
train_set = [];
valid_set = [];
test_set = [];
genuses_train = [];
genuses_valid = [];
genuses_test = [];
count_cnames = {}; 
cnames_train = [];
cnames_valid = [];
cnames_test = [];

In [6]:
# specs = np.zeros((npick*251, 51, 89), dtype = np.float32)
specs_train = []
specs_test = []
genuses_train = []
genuses_test = []
count_cnames = []

count = 0

for k in tqdm(range(nfile)):
    
    filename = files[k]
    song, Fs = sf.read(path+filename)
    T = len(song)/Fs
    
    if T < time_clip:
        continue
        
    genus = meta.genus[index(filename)]
    cname = meta.english_cname[index(filename)];
    count_cnames.append(cname)

    if count_cnames.count(cname) < 3:
        genuses_train.extend([genus]*npick);
        specs_train.extend(boostrap_sample(song, npick, Fs, time_clip))
    else:
        genuses_test.extend([genus]*npick);
        specs_test.extend(boostrap_sample(song, npick, Fs, time_clip))
    
    count += 1

100%|██████████| 264/264 [01:18<00:00,  3.29it/s]


In [7]:
specs_train = np.array(specs_train)
specs_test = np.array(specs_test)

## 3.  Transform datasets

In [8]:
# rescale images to [0,1]

data_train = np.reshape(specs_train, (len(specs_train), -1))
data_test = np.reshape(specs_test, (len(specs_test), -1))

data_train = sklearn.preprocessing.minmax_scale(data_train, axis=1)
data_test = sklearn.preprocessing.minmax_scale(data_test, axis=1)

specs_train_rescale = np.reshape(data_train, (len(specs_train), 51, 177,1))
specs_test_rescale = np.reshape(data_test, (len(specs_test), 51, 177,1))

In [9]:
# change elements of labels to number vectors

list_genus = np.unique(genuses_train)

num_labels_train = []
num_labels_test = []

for genus in genuses_train:
    num_labels_train.append(np.where(genus==list_genus)[0][0])
    
for genus in genuses_test:
    num_labels_test.append(np.where(genus==list_genus)[0][0])
    
labels_train = np.zeros((len(genuses_train), len(list_genus)))
labels_test = np.zeros((len(genuses_test), len(list_genus)))

for j in range(len(labels_train)):
    labels_train[j, num_labels_train[j]] = 1
    
for j in range(len(labels_test)):
    labels_test[j, num_labels_test[j]] = 1

In [10]:
# divide training validation and test

x_train = specs_train_rescale[0:np.int(0.8*len(specs_train))]
x_valid = specs_train_rescale[np.int(0.8*len(specs_train)):]
x_test = specs_test_rescale

y_train = labels_train[0:np.int(0.8*len(specs_train))]
y_valid = labels_train[np.int(0.8*len(specs_train)):]
y_test = labels_test

## 4. Train Network

In [11]:
model = keras.Sequential()

model.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(51,177,1)))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D())

model.add(layers.Dropout(0.4))

model.add(layers.Flatten())

model.add(layers.Dense(units=120, activation='relu'))

model.add(layers.Dropout(0.4))

model.add(layers.Dense(units=84, activation='relu'))

model.add(layers.Dense(units=66, activation = 'softmax'))

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 49, 175, 6)        60        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 87, 6)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 22, 85, 16)        880       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 11, 42, 16)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 11, 42, 16)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 7392)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 120)               887160    
__________

In [13]:
model.compile(loss='mse', optimizer='adam')

In [14]:
model.fit(x_train, y_train, validation_data=(x_valid, y_valid), epochs = 30, batch_size=128)

Train on 13120 samples, validate on 3280 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f2a60b7fef0>

In [15]:
predict_ = model.predict(x_test)
predict = np.zeros((len(predict_[:,0]), len(predict_[0])))

In [16]:
accuracy = 0
for j in range(len(predict_[:,0])):
    predict[j,:] = predict_[j] == np.max(predict_[j])
    if (np.sum(predict[j] == y_test[j]) == len(predict[j])) == 1 :
        accuracy += 1
accuracy /= len(predict[:,0])

In [17]:
accuracy

0.35696428571428573