In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import efficientnet.keras as efn 
import librosa
import librosa.display as display
import os
import matplotlib.pyplot as plt
import matplotlib
from PIL import Image
from sklearn.utils import class_weight
import warnings
from tqdm import tqdm
#from tensorflow.keras import backend

from kapre.time_frequency import Melspectrogram
from kapre.utils import Normalization2D
from kapre.augmentation import AdditiveNoise
from kapre.time_frequency import Spectrogram

import multiprocessing as mp

%matplotlib inline

#!rm -r train_data
#!rm -r val_data
#!rm -r models
#!mkdir models

# suppress warnings
warnings.filterwarnings("ignore")

# fix for RTX2080 CTRNN memory alloc issues
SOUND_DIR = "/project/data/birdsong-recognition/train_audio/"
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

Using TensorFlow backend.


In [2]:
# function for creating Mel Spectrogram
def createMelSpectrogram(input_path, fileName, output_path, saveOrShow=0):
    print("creating for",fileName)
    if (os.path.isdir(os.path.join(input_path, fileName))):
        return
    
    # load sound signal
    signal, sr = librosa.load(os.path.join(input_path, fileName), duration=10)
    
    # create Mel Spectrogram
    S = Melspectrogram(n_dft=1024, 
                       n_hop=256,
                       input_shape=(1, signal.shape[0]),
                       padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                       power_melgram=2.0, return_decibel_melgram=True,
                       trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy()
    
    S = S.reshape(S.shape[1], S.shape[2])
    
    ##### NOTE CHANGE TO FILENAME INDEX TO CORRECTLY PROCESS AUGMENT
    ##### FILES THAT CONTAIN A. PREFIXES AND OTHER DOTS BEFORE ".mp3"
    if saveOrShow == 0:   
        print("writing out", os.path.join(output_path, fileName[:-4] + ".png"))
        matplotlib.image.imsave(os.path.join(output_path, fileName[:-4] + ".png"), S)
    else:
        #plt.imshow(S)
        #plt.show()
        display.specshow(S, sr=sr)
        plt.show()

## Create Mel-Spectrogram for training

In [3]:
# select top 20 birds for training
BIRDS = os.listdir("data/birdsong-recognition/train_audio/")[0:20]

In [4]:
# list of birds
#BIRDS = ["aldfly", "ameavo", "amebit", "amecro", "amegfi",
#         "amekes", "amepip", "amered", "amerob", "amewig"]

suffix = "gn015"

train_folder = "data/train_data_" + suffix + "/"
val_folder = "data/val_data_" + suffix + "/"

if not os.path.exists(train_folder): os.mkdir(train_folder)
if not os.path.exists(val_folder): os.mkdir(val_folder)

In [5]:
def bird_to_mel(bird):
    print(bird, "\n")
    INPUT_DIR = os.path.join("data/birdsong-recognition/train_audio/", bird)
    TRAIN_DIR = os.path.join(train_folder, bird)
    VAL_DIR = os.path.join(val_folder, bird)
    
    # create folders
    if not(os.path.exists(TRAIN_DIR)) and not(os.path.exists(VAL_DIR)): 
        
        os.mkdir(TRAIN_DIR)
        os.mkdir(VAL_DIR)

        
        # split into train and val set
        for f in os.listdir(INPUT_DIR):
            
            rand = np.random.randint(0, 10)

            if rand <= 7: 
                createMelSpectrogram(INPUT_DIR, f, TRAIN_DIR)
            else:
                createMelSpectrogram(INPUT_DIR, f, VAL_DIR)        

In [6]:
### This takes long time to run ###

## Needed to manualy rename the train and validation data directories
## to force it to be recreated with the full data set

# create train and val spectrogram
np.random.seed(1234)

threads = int((mp.cpu_count() /2) - 1)  # for hyperthreading cores
# threads = int(mp.cpu_count() -1)      # for non-hyperthreading cores
# threads = 48

# Handle single-core machines
if (threads < 1): threads = 1
print(f"Launching {threads} threads")
pool = mp.Pool(threads)

pool.map(bird_to_mel, BIRDS)



Launching 31 threads
ameredyelwarsemsanhorlarvigswablugrb1 cedwax sposanbelkin1gockinbrthum olsfly
commer whbnut wilsni1moudovgryfly  pasfly
annhumwewpew    


 
      

 








 
























[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [7]:
IM_SIZE = (224,224,3)
BATCH_SIZE = 8

In [8]:
train_datagen = ImageDataGenerator(preprocessing_function=None,
                                   rescale=1/255,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.1,
                                   fill_mode='nearest')

train_batches = train_datagen.flow_from_directory(train_folder,
                                                  classes=BIRDS, 
                                                  target_size=IM_SIZE[0:2], 
                                                  class_mode='categorical', 
                                                  shuffle=True, batch_size=BATCH_SIZE)

val_datagen = ImageDataGenerator(preprocessing_function=None, rescale=1/255)

val_batches = train_datagen.flow_from_directory(val_folder,
                                                  classes=BIRDS, 
                                                  target_size=IM_SIZE[0:2], 
                                                  class_mode='categorical', 
                                                  shuffle=False, batch_size=1)

class_weights = class_weight.compute_class_weight('balanced', 
                                                  np.unique(train_batches.classes), 
                                                  train_batches.classes)

Found 5333 images belonging to 20 classes.
Found 1355 images belonging to 20 classes.


In [9]:
net = efn.EfficientNetB3(include_top=False, weights="imagenet", input_tensor=None, input_shape=IM_SIZE)
#net.trainable = False

x = net.output

#x1 = tf.keras.layers.GlobalAveragePooling2D()(x)
#x2 = tf.keras.layers.GlobalMaxPool2D()(x)
#x = tf.keras.layers.Concatenate(axis=1)([x1, x2])

x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.5)(x)

output_layer = tf.keras.layers.Dense(len(BIRDS), activation='softmax', name='softmax')(x)
net_final = tf.keras.Model(inputs=net.input, outputs=output_layer)

net_final.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
net_final.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
stem_conv (Conv2D)              (None, 112, 112, 40) 1080        input_1[0][0]                    
__________________________________________________________________________________________________
stem_bn (BatchNormalization)    (None, 112, 112, 40) 160         stem_conv[0][0]                  
__________________________________________________________________________________________________
stem_activation (Activation)    (None, 112, 112, 40) 0           stem_bn[0][0]                    
______________________________________________________________________________________________

In [11]:
##### %%time
ModelCheck = tf.keras.callbacks.ModelCheckpoint('models/efficientnet_checkpoint.h5', monitor='val_loss', verbose=0, 
                             save_best_only=True, save_weights_only=True, mode='auto')

net_final.reset_states()
net_final.fit_generator(train_batches, 
                        validation_data=val_batches,
                        steps_per_epoch = int(len(train_batches.classes)/BATCH_SIZE)+1,
                        validation_steps=len(val_batches.classes),
                        epochs=60, 
                        callbacks=[ModelCheck],
                       class_weight={i:class_weights[i] for i in range(len(BIRDS))})

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60


Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<tensorflow.python.keras.callbacks.History at 0x7f54e44467b8>

In [12]:
net_final.load_weights('models/efficientnet_checkpoint.h5')
print("Accuracy on val data")
net_final.evaluate(val_batches, steps=len(val_batches.classes))[1]

Accuracy on val data


0.8826568126678467

In [13]:
# net_final.save("models/net_final_augmented_data_1.h5")

In [14]:
# net_final.save_weights("models/net_final_augmented_data_1.ckpt")

In [15]:
# tf.saved_model.save(net_final, "models/net_final_augmented_data_1.pb")