# British Birdsong Classification Using CNN on the Spectrogram Images

This project is aiming at identifying the genus of a bird from 66 different genuses using a CNN model on the spectrogram images of the bird songs.

In [13]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import soundfile as sf
import re
import librosa
import librosa.display
from tqdm import tqdm
import sklearn
import keras
import keras.layers as layers
from keras.models import Sequential

## 1. Load Data

In [2]:
path = './british-birdsong-dataset/songs/'
files= os.listdir(path)
nfile = len(files);
meta = pd.read_csv('./british-birdsong-dataset/birdsong_metadata.csv')

## 2. Constructing Datasets

In [3]:
def index(filename_):
    '''returns the index of the filename in metadata'''
    return np.where(meta.file_id == np.int(re.findall('\d+', filename_)[0]))[0][0]

In [4]:
def boostrap_sample(song_, num, rs, tclip):
    '''sample num clips from song_ '''
    spcs = []
    
    for j in range(num):
        pstart = np.random.randint(len(song_) - tclip*rs)
        pend = pstart + tclip*rs
        clip = song_[pstart:pend]
        
        # construct spectrograms for clips
        X = librosa.stft(clip, n_fft=100, hop_length=5000)
        # Xdb = librosa.amplitude_to_db(abs(X))
        
        spcs.append(abs(X))
    
    spcs = np.array(spcs)
        
    return spcs

In [5]:
time_clip = 10;
npick = 100;
train_set = [];
valid_set = [];
test_set = [];
genuses_train = [];
genuses_valid = [];
genuses_test = [];
count_cnames = {}; 
cnames_train = [];
cnames_valid = [];
cnames_test = [];

In [6]:
specs = np.zeros((npick*nfile, 51, 89), dtype = np.float32)
genuses = []
cnames = []

count = 0

for k in tqdm(range(nfile)):
    filename = files[k]
    song, Fs = sf.read(path+filename)
    T = len(song)/Fs
    
    genus = meta.genus[index(filename)]
    cname = meta.english_cname[index(filename)];
    genuses.extend([genus]*npick)
    cnames.extend([cname]*npick)
    
    if T < time_clip:
        continue
    
#     # build dictionary for bird names
#     if cname in count_cnames:
#         count_cnames[cname] += 1
#     else:
#         count_cnames[cname] = 1
    
    specs[k*npick:k*npick+npick,:,:] = boostrap_sample(song, npick, Fs, time_clip)

100%|██████████| 264/264 [00:36<00:00,  8.21it/s]


## 3. Divide Training/Validation/Test

In [7]:
# remove black images

inds = []

for j in range(len(specs)):
    if sum(sum(specs[j]==0))==51*89:
        inds.append(j)

specs = np.delete(specs, inds, axis=0)
genuses = np.delete(np.array(genuses), inds)
cnames =np.delete(np.array(cnames), inds)

In [9]:
# shuffle the datasets in the same way

inds = np.arange(0, len(specs), 1)
inds = np.random.permutation(inds)

specs = specs[inds, :, :]
genuses = genuses[inds]
cnames = cnames[inds]

In [23]:
# rescale images to [0,1]

data_mat = np.reshape(specs, (len(specs), -1))
data_mat = sklearn.preprocessing.minmax_scale(data_mat, axis=1)
specs_rescale = np.reshape(data_mat, (len(specs), 51, 89,1))

In [24]:
# divide training validation and test

x_train = specs_rescale[0:np.int(0.6*len(specs))]
x_valid = specs_rescale[np.int(0.6*len(specs)):np.int(0.8*len(specs))]
x_test = specs_rescale[np.int(0.8*len(specs)):]

y_train = genuses[0:np.int(0.6*len(specs))]
y_valid = genuses[np.int(0.6*len(specs)):np.int(0.8*len(specs))]
y_test = genuses[np.int(0.8*len(specs)):]

## 4. Train Network

In [22]:
model = keras.Sequential()

model.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(51,89,1)))
model.add(layers.MaxPooling2D())

model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(layers.MaxPooling2D())

model.add(layers.Flatten())

model.add(layers.Dense(units=120, activation='relu'))

model.add(layers.Dense(units=84, activation='relu'))

model.add(layers.Dense(units=66, activation = 'softmax'))

In [29]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 49, 87, 6)         60        
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 24, 43, 6)         0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 22, 41, 16)        880       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 11, 20, 16)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 3520)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 120)               422520    
_________________________________________________________________
dense_5 (Dense)              (None, 84)                10164     
__________

In [27]:
model.compile(loss='mse', optimizer='adam')

In [28]:
model.fit(x_train, y_train, validation_data=(x_valid, y_valid), epochs = 30, batch_size=128)

ValueError: Error when checking target: expected dense_6 to have shape (66,) but got array with shape (1,)