In [None]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## Imports and global parameters

In [None]:
!pip install noisereduce

Collecting noisereduce
  Downloading https://files.pythonhosted.org/packages/04/d0/4e50cac3daaa1522a3730ec22750ca86f6c221a480e65c8d6b0ab18a21ed/noisereduce-1.1.0.tar.gz
Building wheels for collected packages: noisereduce
  Building wheel for noisereduce (setup.py) ... [?25l[?25hdone
  Created wheel for noisereduce: filename=noisereduce-1.1.0-cp37-none-any.whl size=7610 sha256=d9f8c6966c0a58b459a1a06d3acd8c7249bb4e6dc662a7eb3a3da3a9fc30793c
  Stored in directory: /root/.cache/pip/wheels/a6/2c/70/f9ccb41280dcfbe3eaeb7930f913dd85394617f3d3136f29cc
Successfully built noisereduce
Installing collected packages: noisereduce
Successfully installed noisereduce-1.1.0


In [None]:
import os,sys 
import re
sys.path.append('/content/gdrive/My Drive/birdclef-2021')
path = ('/content/gdrive/My Drive/birdclef-2021/')

import warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import pickle

import librosa
import noisereduce
import librosa.display
import IPython.display as ipd

from sklearn.utils import shuffle
from PIL import Image

import torch
from torchvision import transforms

import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import ResNet101V2
from tensorflow.keras.applications import ResNet152V2

import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model, load_model
#from keras.initializers import glorot_uniform
from tensorflow.keras.layers import Input, Dropout, Add, Dense
from tensorflow.keras.layers import AveragePooling2D, Reshape, Activation
from tensorflow.keras.layers import BatchNormalization, Flatten, Conv1D

import utils

# Global vars
RANDOM_SEED = 1337
SAMPLE_RATE = 32000
SIGNAL_LENGTH = 5 # seconds
SPEC_SHAPE = (48, 128) # height x width
INPUT_SHAPE = SPEC_SHAPE + (3,)
FMIN = 500
FMAX = 12500
MAX_AUDIO_FILES = 1500

input_dir = path + 'basic_test/birds/'
output_dir = path + 'basic_test/melspectrogram_dataset/'
model_dir = path + 'basic_test/'
soundscape_dir = path + 'basic_test/'

Hello Kaare
utils


## Get data

### Select data
Find all bird species with more than 200 entries in the training data and save that data as TRAIN and LABELS

In [None]:
# Load metadata file
train = pd.read_csv(path + 'train_metadata.csv',)

# Limit the number of training samples and classes
# First, only use high quality samples
train = train.query('rating>=4')

# Second, assume that birds with the most training samples are also the most common
# A species needs at least 200 recordings with a rating above 4 to be considered common
birds_count = {}
for bird_species, count in zip(train.primary_label.unique(), 
                               train.groupby('primary_label')['primary_label'].count().values):
    birds_count[bird_species] = count
most_represented_birds = [key for key,value in birds_count.items() if value >= 200] 

TRAIN = train.query('primary_label in @most_represented_birds')
LABELS = sorted(TRAIN.primary_label.unique())

# Let's see how many species and samples we have left
print('NUMBER OF SPECIES IN TRAIN DATA:', len(LABELS))
print('NUMBER OF SAMPLES IN TRAIN DATA:', len(TRAIN))
print('LABELS:', most_represented_birds)

NUMBER OF SPECIES IN TRAIN DATA: 27
NUMBER OF SAMPLES IN TRAIN DATA: 8548
LABELS: ['amerob', 'barswa', 'bewwre', 'blujay', 'bncfly', 'carwre', 'compau', 'comrav', 'comyel', 'eursta', 'gbwwre1', 'grekis', 'houspa', 'houwre', 'mallar3', 'norcar', 'normoc', 'redcro', 'rewbla', 'roahaw', 'rubpep1', 'rucspa1', 'sonspa', 'spotow', 'wbwwre1', 'wesmea', 'yeofly1']


In [None]:
# Shuffle the training data and limit the number of audio files to MAX_AUDIO_FILES
TRAIN = shuffle(TRAIN, random_state=RANDOM_SEED)[:MAX_AUDIO_FILES]

print('FINAL NUMBER OF AUDIO FILES IN TRAINING DATA:', len(TRAIN))

FINAL NUMBER OF AUDIO FILES IN TRAINING DATA: 1500


### Load spectograms
Run the load function

In [None]:
# Parse audio files and extract training samples
samples = []
with tqdm(total=len(TRAIN)) as pbar:
    for idx, row in TRAIN.iterrows():
        pbar.update(1)
        
        if row.primary_label in most_represented_birds:
            audio_file_path = os.path.join(input_dir, row.primary_label, row.filename)
            samples += utils.get_spectrograms(audio_file_path, row.primary_label, output_dir, SAMPLE_RATE, SIGNAL_LENGTH, SPEC_SHAPE, FMIN, FMAX)
            
TRAIN_SPECS = shuffle(samples, random_state=RANDOM_SEED)
print('SUCCESSFULLY EXTRACTED {} SPECTROGRAMS'.format(len(TRAIN_SPECS)))

with open(path + "samples_paths.txt", "wb") as fp:
  pickle.dump(TRAIN_SPECS, fp)

100%|██████████| 1500/1500 [38:44<00:00,  1.55s/it]


SUCCESSFULLY EXTRACTED 4157 SPECTROGRAMS


### Load data
Load spectograms and normalize, and add to a stack of 4154 sound bites, of 48 (MEL resolution) x 128 (time resolution) pixels. Labels are 4154 one-hot vectors of length 27 (number of bird types)

In [None]:
with open(path + "samples_paths.txt", "rb") as fp:
  TRAIN_SPECS = pickle.load(fp)

In [None]:
# Parse all samples and add spectrograms into train data, primary_labels into label data
train_specs, train_labels = [], []
with tqdm(total=len(TRAIN_SPECS)) as pbar:
    for path in TRAIN_SPECS:
        pbar.update(1)

        # Open image
        spec = Image.open(path)

        # Convert to numpy array
        spec = np.array(spec, dtype='float32')
        
        # Normalize between 0.0 and 1.0
        # and exclude samples with nan 
        spec -= spec.min()
        spec /= spec.max()
        if not spec.max() == 1.0 or not spec.min() == 0.0:
            continue

        # Add channel axis to 2D array
        spec = np.expand_dims(spec, -1)

        # Add new dimension for batch size
        spec = np.expand_dims(spec, 0)

        # Add to train data
        if len(train_specs) == 0:
            train_specs = spec
        else:
            train_specs = np.vstack((train_specs, spec))

        # Add to label data
        target = np.zeros((len(LABELS)), dtype='float32')
        bird = path.replace(os.sep, '/').split('/')[-2]
        target[LABELS.index(bird)] = 1.0
        if len(train_labels) == 0:
            train_labels = target
        else:
            train_labels = np.vstack((train_labels, target))

 36%|███▌      | 1485/4157 [10:02<17:08,  2.60it/s]

### Repeat in channel dimension and preprocess

In [None]:
train_input = tf.keras.applications.resnet.preprocess_input(np.repeat(train_specs, 3, axis=3))

## Create model

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


In [None]:
tf.device('/device:GPU:0')

### Load base model

In [None]:
baseModel = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(INPUT_SHAPE)))
#baseModel = ResNet101V2(weights='imagenet', include_top=False, input_tensor=Input(shape=(INPUT_SHAPE)))
#baseModel = ResNet152V2(weights='imagenet', include_top=False, input_tensor=Input(shape=(INPUT_SHAPE)))

train_input = tf.keras.applications.resnet.preprocess_input(np.repeat(train_specs, 3, axis=3))
#train_input = tf.keras.applications.resnet_v2.preprocess_input(np.repeat(train_specs, 3, axis=3))

In [None]:
baseModel.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 48, 128, 3)] 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 54, 134, 3)   0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 24, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 24, 64, 64)   256         conv1_conv[0][0]                 
___________________________________________________________________________________________

### Create a new head

In [None]:
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(2, 4))(headModel)
#headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)

headModel = Dense(256, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)

headModel = Dense(128, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)

headModel = Dense(64, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)

headModel = Dense(len(LABELS), activation="softmax")(headModel)

### Add to model

In [None]:
model = Model(inputs=baseModel.input, outputs=headModel)

# Reset trainability
for layer in baseModel.layers:
	layer.trainable = True

model.summary()

# Select untrainable layers
#for layer in baseModel.layers[12:-24]:
	#layer.trainable = False

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 48, 128, 3)] 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 54, 134, 3)   0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 24, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 24, 64, 64)   256         conv1_conv[0][0]                 
______________________________________________________________________________________________

In [None]:
n_layers = 0
for layer in baseModel.layers:
  n_layers +=1
  print(n_layers, layer.name)
  print(layer.trainable)

1 input_1
True
2 conv1_pad
True
3 conv1_conv
True
4 conv1_bn
True
5 conv1_relu
True
6 pool1_pad
True
7 pool1_pool
True
8 conv2_block1_1_conv
True
9 conv2_block1_1_bn
True
10 conv2_block1_1_relu
True
11 conv2_block1_2_conv
True
12 conv2_block1_2_bn
True
13 conv2_block1_2_relu
True
14 conv2_block1_0_conv
True
15 conv2_block1_3_conv
True
16 conv2_block1_0_bn
True
17 conv2_block1_3_bn
True
18 conv2_block1_add
True
19 conv2_block1_out
True
20 conv2_block2_1_conv
True
21 conv2_block2_1_bn
True
22 conv2_block2_1_relu
True
23 conv2_block2_2_conv
True
24 conv2_block2_2_bn
True
25 conv2_block2_2_relu
True
26 conv2_block2_3_conv
True
27 conv2_block2_3_bn
True
28 conv2_block2_add
True
29 conv2_block2_out
True
30 conv2_block3_1_conv
True
31 conv2_block3_1_bn
True
32 conv2_block3_1_relu
True
33 conv2_block3_2_conv
True
34 conv2_block3_2_bn
True
35 conv2_block3_2_relu
True
36 conv2_block3_3_conv
True
37 conv2_block3_3_bn
True
38 conv2_block3_add
True
39 conv2_block3_out
True
40 conv3_block1_1_conv
Tr

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),
              loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.01),
              metrics=['accuracy'])

In [None]:
# Add callbacks to reduce the learning rate if needed, early stopping, and checkpoint saving
callbacks = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                  patience=2, 
                                                  verbose=1, 
                                                  factor=0.5),
             tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                              verbose=1,
                                              patience=5),
             tf.keras.callbacks.ModelCheckpoint(filepath=model_dir + 'best_model.h5', 
                                                monitor='val_loss',
                                                verbose=0,
                                                save_best_only=True)]

callbacks = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                  patience=2, 
                                                  verbose=1, 
                                                  factor=0.5),
             tf.keras.callbacks.ModelCheckpoint(filepath=model_dir + 'best_model.h5', 
                                                monitor='val_loss',
                                                verbose=0,
                                                save_best_only=True)]

## Train model

In [None]:
model.fit(train_input, 
          train_labels,
          batch_size=32,
          validation_split=0.2,
          callbacks=callbacks,
          epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20

Epoch 00016: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 17/20
Epoch 18/20

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 19/20
Epoch 20/20

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.


<tensorflow.python.keras.callbacks.History at 0x7fb65bac4d10>

## Homemade

In [None]:
def identity_block(X, f, filters, stage, block):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value. We'll need this later to add back to the main path. 
    X_shortcut = X

    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)

    return X

In [None]:
def convolutional_block(X, f, filters, stage, block, s = 2):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    
    # Retrieve Filters
    F1, F2, F3 = filters
    
    # Save the input value
    X_shortcut = X


    ##### MAIN PATH #####
    # First component of main path 
    X = Conv2D(F1, (1, 1), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    
    ##### SHORTCUT PATH ####
    X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    
    return X

In [None]:
def ResNet18(input_shape = (INPUT_SHAPE), classes = len(LABELS)):   
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    
    # Stage 1
    X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = 'bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')

    # Stage 3
    X = convolutional_block(X, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')


    # Stage 4
    X = convolutional_block(X, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')


    # Stage 5
    X = convolutional_block(X, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')

    # AVGPOOL.
    X = AveragePooling2D((2, 2), name='avg_pool')(X)

    # output layer
    X = Flatten()(X)

    X = Dense(256, activation="relu")(X)
    X = Dropout(0.5)(X)

    X = Dense(256, activation="relu")(X)
    X = Dropout(0.5)(X)

    X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X)
    
    # Create model
    model = Model(inputs = X_input, outputs = X, name='ResNet50')

    return model

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import cv2
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from keras.models import Model, load_model
from keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import keras.backend as K
import tensorflow as tf



model = ResNet18(input_shape = INPUT_SHAPE, classes = len(LABELS))

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(train_input, 
          train_labels,
          batch_size=32,
          validation_split=0.2,
          callbacks=callbacks,
          epochs=50)