In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Model
import efficientnet.keras as efn 
import librosa
import librosa.display as display
import os
import matplotlib.pyplot as plt
import matplotlib
from PIL import Image
from sklearn.utils import class_weight
import warnings
from tqdm import tqdm
#from tensorflow.keras import backend

from kapre.time_frequency import Melspectrogram
from kapre.utils import Normalization2D
from kapre.augmentation import AdditiveNoise
from kapre.time_frequency import Spectrogram

#from python_speech_features import mfcc
#from mutagen.mp3 import MP3
#from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift

#augmenter = Compose([
#    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.02, p=0.5)
#])

%matplotlib inline

#!rm -r train_data
#!rm -r val_data
#!rm -r models
#!mkdir models

# suppress warnings
warnings.filterwarnings("ignore")

SOUND_DIR = "data/birdsong-recognition/train_audio/"

Using TensorFlow backend.


In [2]:
train_folder = "melspectrogram/train_data"
val_folder = "melspectrogram/val_data"

IM_SIZE = (224,224,3,)
BATCH_SIZE = 32

a = os.listdir(train_folder)
b = os.listdir(val_folder)
print(a == b)

BIRDS = os.listdir(train_folder)
BIRDS = sorted(BIRDS)
print(len(BIRDS))

True
264


In [3]:
train_datagen = ImageDataGenerator(preprocessing_function=None,
                                   rescale=1/255)

train_batches = train_datagen.flow_from_directory(train_folder,
                                                  classes=BIRDS, 
                                                  target_size=IM_SIZE[0:2], 
                                                  class_mode='categorical', 
                                                  shuffle=True, batch_size=BATCH_SIZE)

val_datagen = ImageDataGenerator(preprocessing_function=None, rescale=1/255)

val_batches = val_datagen.flow_from_directory(val_folder,
                                                  classes=BIRDS, 
                                                  target_size=IM_SIZE[0:2], 
                                                  class_mode='categorical', 
                                                  shuffle=False, batch_size=BATCH_SIZE)

class_weights = class_weight.compute_class_weight('balanced', 
                                                  np.unique(train_batches.classes), 
                                                  train_batches.classes)

Found 88763 images belonging to 264 classes.
Found 22159 images belonging to 264 classes.


In [7]:
net = efn.EfficientNetB3(include_top=False, weights="imagenet", input_tensor=None, input_shape=IM_SIZE)
#net.trainable = False

# InceptionV3
#net = tf.keras.applications.inception_v3.InceptionV3(weights='imagenet', include_top=False)

x = net.output

#x1 = tf.keras.layers.GlobalAveragePooling2D()(x)
#x2 = tf.keras.layers.GlobalMaxPool2D()(x)
#x = tf.keras.layers.Concatenate(axis=1)([x1, x2])

x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.5)(x)

output_layer = tf.keras.layers.Dense(len(BIRDS), activation='softmax', name='softmax')(x)
net_final = tf.keras.Model(inputs=net.input, outputs=output_layer)

net_final.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
net_final.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
stem_conv (Conv2D)              (None, 112, 112, 40) 1080        input_2[0][0]                    
__________________________________________________________________________________________________
stem_bn (BatchNormalization)    (None, 112, 112, 40) 160         stem_conv[0][0]                  
__________________________________________________________________________________________________
stem_activation (Activation)    (None, 112, 112, 40) 0           stem_bn[0][0]                    
____________________________________________________________________________________________

In [9]:
ModelCheck = tf.keras.callbacks.ModelCheckpoint('models/efficientNetB3_checkpoint.h5', 
                                                monitor='val_loss', verbose=0, 
                                                save_best_only=True, save_weights_only=True, mode='auto')

ES = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, verbose=0)

net_final.reset_states()
net_final.fit_generator(train_batches, 
                        validation_data=val_batches,
                        steps_per_epoch = int(len(train_batches.classes)/BATCH_SIZE)+1,
                        validation_steps=int(len(val_batches.classes) // BATCH_SIZE) + 1,
                        epochs=50, 
                        callbacks=[ModelCheck, ES],
                       class_weight={i:class_weights[i] for i in range(len(BIRDS))})

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50


<tensorflow.python.keras.callbacks.History at 0x7fb520d25ac8>

In [10]:
net_final.load_weights('models/efficientNetB3_checkpoint.h5')
print("Accuracy on val data")
net_final.evaluate(val_batches, steps=int(len(val_batches.classes) // BATCH_SIZE) + 1)[1]

Accuracy on val data


0.797599196434021