In [1]:
#!pip install torchlibrosa

In [2]:
import numpy as np
import pandas as pd
import json
from sklearn import preprocessing
import os
from tqdm import tqdm
import librosa
import tensorflow as tf
import matplotlib.pyplot as plt

  _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)


In [3]:

# Data:
TRAIN_DIR = './train_audio/'
IMAGES_DIR = './images/'
SAMPLE_RATE = 32000
VAL_SIZE = 0.2

# Data processing:
N_FFT = 2048
HOP_LEN = 512
WIN_FUNC = 'hann'
N_MELS = 224
F_MIN = 0
F_MAX = SAMPLE_RATE / 2

# Learning process:
NAME_MODEL_0 = "model_resnet50_inst.h5"
NAME_MODEL_0_PIC = 'model_resnet50_pic.png'
NAME_MODEL_0_CHECKPOINT = 'model_resnet50_cp.ckpt'
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
BATCH_SIZE = 32
N_CHANNELS = 3
EPOCHS = 100
CALL_BACKS = [tf.keras.callbacks.ModelCheckpoint(
    filepath=NAME_MODEL_0_CHECKPOINT,
    save_weights_only=True,
    verbose=0
)]

train_metadata = pd.read_csv('./train_metadata.csv')
train_metadata.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,time,url,filename
0,afrsil1,[],"['call', 'flight call']",12.391,-1.493,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,2.5,08:00,https://www.xeno-canto.org/125458,afrsil1/XC125458.ogg
1,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],19.8801,-155.7254,Euodice cantans,African Silverbill,Dan Lane,Creative Commons Attribution-NonCommercial-Sha...,3.5,08:30,https://www.xeno-canto.org/175522,afrsil1/XC175522.ogg
2,afrsil1,[],"['call', 'song']",16.2901,-16.0321,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:30,https://www.xeno-canto.org/177993,afrsil1/XC177993.ogg
3,afrsil1,[],"['alarm call', 'call']",17.0922,54.2958,Euodice cantans,African Silverbill,Oscar Campbell,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:00,https://www.xeno-canto.org/205893,afrsil1/XC205893.ogg
4,afrsil1,[],['flight call'],21.4581,-157.7252,Euodice cantans,African Silverbill,Ross Gallardy,Creative Commons Attribution-NonCommercial-Sha...,3.0,16:30,https://www.xeno-canto.org/207431,afrsil1/XC207431.ogg


In [4]:
# Load work classes:
with open('./scored_birds.json', 'r') as f:
    valid_classes = json.load(f)

primary_labels = train_metadata.primary_label

# Encode labels:
encoder = preprocessing.LabelEncoder()
labels = encoder.fit_transform(primary_labels)
labels = np.uint8(labels)

NUM_CLASSES = len(np.unique(labels))

In [5]:
# Make a dataset containing the training spectrograms
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    batch_size=BATCH_SIZE,
    validation_split=VAL_SIZE,
    directory=IMAGES_DIR,
    shuffle=True,
    color_mode='rgb',
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    subset="training",
    label_mode='categorical',
    seed=42
)

# Make a dataset containing the validation spectrogram
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    batch_size=BATCH_SIZE,
    validation_split=VAL_SIZE,
    directory=IMAGES_DIR,
    shuffle=True,
    color_mode='rgb',
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    subset="validation",
    label_mode='categorical',
    seed=42
)

Found 144843 files belonging to 152 classes.
Using 115875 files for training.
Found 144843 files belonging to 152 classes.
Using 28968 files for validation.


In [6]:
# Function to prepare our datasets for modelling
def prepare(ds, augment=False):
    # Define our one transformation
    rescale = tf.keras.Sequential([tf.keras.layers.experimental.preprocessing.Rescaling(1./255)])
    flip_and_rotate = tf.keras.Sequential([
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)
    ])
    
    # Apply rescale to both datasets and augmentation only to training
    ds = ds.map(lambda x, y: (rescale(x, training=True), y))
    if augment: ds = ds.map(lambda x, y: (flip_and_rotate(x, training=True), y))
    return ds

train_dataset = prepare(train_dataset, augment=False)
valid_dataset = prepare(valid_dataset, augment=False)

In [7]:
from  tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from  tensorflow.keras.applications.resnet50 import ResNet50
from keras.layers import Input
from keras.models import Model


def getModel():
    resnet50_input = Input(shape = (224, 224, 3), name = 'Image_input')




    #Get back the convolutional part of a Resnet network trained on ImageNet
    model_resnet50_conv = ResNet50(weights= 'imagenet', include_top=False, input_shape= (224,224,3))

   #Use the generated model 


    output_resnet50_conv= model_resnet50_conv(resnet50_input)

    #Add the fully-connected layers 

    x = Flatten(name='flatten')(output_resnet50_conv)
    x = Dense(152, activation='softmax', name='predictions')(x)

    resnet50_pretrained = Model(resnet50_input,x)
    # vgg16_pretrained.summary()

    # Compile CNN model
    sgd = tf.keras.optimizers.SGD(lr = 0.001)
    resnet50_pretrained.compile(loss='categorical_crossentropy',optimizer = sgd,metrics=['accuracy'])

    return resnet50_pretrained

In [8]:

def plotMetrics(history):
    metrics = list()
    for key, value in history.history.items():
        metrics.append(key)
        
    for i in range(int(len(metrics) / 2)):
        plt.figure(figsize=(24, 6))
        plt.plot(history.history[metrics[i]], c ="darkblue")
        plt.plot(history.history[metrics[i + int(len(metrics) / 2)]], c ="crimson")
        plt.legend(["Train", "Validation"])
        plt.title("Model" + metrics[i])
        plt.xlabel("Epoch")
        plt.ylabel(metrics[i])
        plt.grid(True, alpha = 0.2)
        plt.show()

In [9]:

import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image 
from tensorflow.keras.layers import BatchNormalization
from keras import optimizers

In [10]:
from keras.callbacks import ModelCheckpoint,EarlyStopping,LearningRateScheduler
import math
checkpointer = ModelCheckpoint('resnet50_weights.hdf5', verbose=1, save_best_only=True)
earlystopper = EarlyStopping(monitor='accuracy', patience=7, verbose=1)

def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop,  
        math.floor((1+epoch)/epochs_drop))
    return lrate
lrate = LearningRateScheduler(step_decay)


model = getModel()
tf.keras.utils.plot_model(model, NAME_MODEL_0_PIC, show_shapes=True)
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=valid_dataset,
    callbacks=[checkpointer, earlystopper,lrate]
)
model.save(NAME_MODEL_0)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100

Epoch 00001: val_loss improved from inf to 1.63588, saving model to resnet50_weights.hdf5




Epoch 2/100

Epoch 00002: val_loss did not improve from 1.63588
Epoch 3/100

Epoch 00003: val_loss improved from 1.63588 to 1.50509, saving model to resnet50_weights.hdf5
Epoch 4/100

Epoch 00004: val_loss improved from 1.50509 to 1.31592, saving model to resnet50_weights.hdf5
Epoch 5/100

Epoch 00005: val_loss did not improve from 1.31592
Epoch 6/100

Epoch 00006: val_loss did not improve from 1.31592
Epoch 7/100

Epoch 00007: val_loss did not improve from 1.31592
Epoch 8/100

Epoch 00008: val_loss did not improve from 1.31592
Epoch 9/100

Epoch 00009: val_loss did not improve from 1.31592
Epoch 10/100

Epoch 00010: val_loss did not improve from 1.31592
Epoch 11/100

Epoch 00011: val_loss did not improve from 1.31592
Epoch 12/100

Epoch 00012: val_loss did not improve from 1.31592
Epoch 13/100

Epoch 00013: val_loss did not improve from 1.31592
Epoch 14/100

Epoch 00014: val_loss did not improve from 1.31592
Epoch 15/100

Epoch 00015: val_loss did not improve from 1.31592
Epoch 16/100