In [1]:
# Import necessary libraries
import os
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio
from IPython import display
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, BatchNormalization, Dropout
from keras.losses import SparseCategoricalCrossentropy
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from tensorflow import keras
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [3]:
# Define the bird dataset CSV file path
bird_csv_path = '.\small_birds_dataset\metadata.csv'  # Replace with the actual path to your Birds dataset CSV file

# Load the bird dataset CSV file
bird_pd_data = pd.read_csv(bird_csv_path)

# Display the first few rows of the bird dataset
bird_pd_data.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,date,filename,license,rating,time,url,duration,split
0,azaspi1,[],['song'],8.7171,-71.3304,Synallaxis azarae,Azara's Spinetail,Joe Klaiber,0000-00-00,XC219896.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,00:00,https://www.xeno-canto.org/219896,9.682,train
1,azaspi1,[],['song'],-0.44,-77.8624,Synallaxis azarae,Azara's Spinetail,GABRIEL LEITE,2014-07-11,XC186641.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,11:00,https://www.xeno-canto.org/186641,12.304,train
2,azaspi1,[],['song'],-17.1528,-65.9084,Synallaxis azarae,Azara's Spinetail,Sjoerd Mayer,1992-08-28,XC1596.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,?,https://www.xeno-canto.org/1596,12.862,train
3,azaspi1,[],['song'],-4.6476,-79.74426,Synallaxis azarae,Azara's Spinetail,Willem-Pier Vellinga,2000-07-00,XC746.wav,Creative Commons Attribution-NonCommercial-Sha...,4.5,?:?,https://www.xeno-canto.org/746,14.358,test
4,azaspi1,[],['song'],1.6564,-76.183,Synallaxis azarae,Azara's Spinetail,David Bradley,2015-12-26,XC298616.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,06:00,https://www.xeno-canto.org/298616,14.78,train


In [4]:
# Load your Birds dataset
birds_csv = 'small_birds_dataset/metadata.csv'
base_data_path = 'small_birds_dataset'

# Read CSV with the provided column names
pd_data = pd.read_csv(birds_csv)

# Define your classes and map class to ID
my_classes = ['White-breasted Wood-Wren', 'House Sparrow', 'Red Crossbill', 'Chestnut-crowned Antpitta', 'Azara\'s Spinetail']
map_class_to_id = {bird_class: idx for idx, bird_class in enumerate(my_classes)}

# Filter the dataframe based on your classes
filtered_pd = pd_data[pd_data['common_name'].isin(my_classes)]

# Map bird species to class ID
class_id = filtered_pd['common_name'].apply(lambda name: map_class_to_id[name])
filtered_pd = filtered_pd.assign(target=class_id)

# Create full paths for audio files
full_path = filtered_pd.apply(lambda row: os.path.join(base_data_path, row['split'], row['primary_label'], row['filename']), axis=1)
filtered_pd = filtered_pd.assign(filename=full_path)

# Display the modified dataframe
filtered_pd.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,date,filename,license,rating,time,url,duration,split,target
0,azaspi1,[],['song'],8.7171,-71.3304,Synallaxis azarae,Azara's Spinetail,Joe Klaiber,0000-00-00,small_birds_dataset\train\azaspi1\XC219896.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,00:00,https://www.xeno-canto.org/219896,9.682,train,4
1,azaspi1,[],['song'],-0.44,-77.8624,Synallaxis azarae,Azara's Spinetail,GABRIEL LEITE,2014-07-11,small_birds_dataset\train\azaspi1\XC186641.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,11:00,https://www.xeno-canto.org/186641,12.304,train,4
2,azaspi1,[],['song'],-17.1528,-65.9084,Synallaxis azarae,Azara's Spinetail,Sjoerd Mayer,1992-08-28,small_birds_dataset\train\azaspi1\XC1596.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,?,https://www.xeno-canto.org/1596,12.862,train,4
3,azaspi1,[],['song'],-4.6476,-79.74426,Synallaxis azarae,Azara's Spinetail,Willem-Pier Vellinga,2000-07-00,small_birds_dataset\test\azaspi1\XC746.wav,Creative Commons Attribution-NonCommercial-Sha...,4.5,?:?,https://www.xeno-canto.org/746,14.358,test,4
4,azaspi1,[],['song'],1.6564,-76.183,Synallaxis azarae,Azara's Spinetail,David Bradley,2015-12-26,small_birds_dataset\train\azaspi1\XC298616.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,06:00,https://www.xeno-canto.org/298616,14.78,train,4


In [5]:
filtered_pd

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,date,filename,license,rating,time,url,duration,split,target
0,azaspi1,[],['song'],8.7171,-71.33040,Synallaxis azarae,Azara's Spinetail,Joe Klaiber,0000-00-00,small_birds_dataset\train\azaspi1\XC219896.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,00:00,https://www.xeno-canto.org/219896,9.682,train,4
1,azaspi1,[],['song'],-0.4400,-77.86240,Synallaxis azarae,Azara's Spinetail,GABRIEL LEITE,2014-07-11,small_birds_dataset\train\azaspi1\XC186641.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,11:00,https://www.xeno-canto.org/186641,12.304,train,4
2,azaspi1,[],['song'],-17.1528,-65.90840,Synallaxis azarae,Azara's Spinetail,Sjoerd Mayer,1992-08-28,small_birds_dataset\train\azaspi1\XC1596.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,?,https://www.xeno-canto.org/1596,12.862,train,4
3,azaspi1,[],['song'],-4.6476,-79.74426,Synallaxis azarae,Azara's Spinetail,Willem-Pier Vellinga,2000-07-00,small_birds_dataset\test\azaspi1\XC746.wav,Creative Commons Attribution-NonCommercial-Sha...,4.5,?:?,https://www.xeno-canto.org/746,14.358,test,4
4,azaspi1,[],['song'],1.6564,-76.18300,Synallaxis azarae,Azara's Spinetail,David Bradley,2015-12-26,small_birds_dataset\train\azaspi1\XC298616.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,06:00,https://www.xeno-canto.org/298616,14.780,train,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,wbwwre1,[],['song'],-0.9415,-77.89890,Henicorhina leucosticta,White-breasted Wood-Wren,Leonardo Ordóñez-Delgado,2011-12-10,small_birds_dataset\train\wbwwre1\XC116238.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,17:00,https://www.xeno-canto.org/116238,99.102,train,0
290,wbwwre1,[],['song'],0.8670,-78.73300,Henicorhina leucosticta,White-breasted Wood-Wren,Niels Krabbe,1999-10-21,small_birds_dataset\train\wbwwre1\XC242770.wav,Creative Commons Attribution-ShareAlike 4.0,5.0,10:35:00 AM,https://www.xeno-canto.org/242770,113.526,train,0
291,wbwwre1,[],['song'],8.5054,-77.97590,Henicorhina leucosticta,White-breasted Wood-Wren,Kent Livezey,2017-05-25,small_birds_dataset\train\wbwwre1\XC372481.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,09:00,https://www.xeno-canto.org/372481,120.780,train,0
292,wbwwre1,[],['song'],-3.7490,-78.87060,Henicorhina leucosticta,White-breasted Wood-Wren,Leonardo Ordóñez-Delgado,2014-06-24,small_birds_dataset\train\wbwwre1\XC190204.wav,Creative Commons Attribution-NonCommercial-Sha...,5.0,17:00,https://www.xeno-canto.org/190204,127.122,train,0


In [6]:
# Create TensorFlow Dataset for training
train_filenames = filtered_pd[filtered_pd['split'] == 'train']['filename']
train_targets = filtered_pd[filtered_pd['split'] == 'train']['target']

train_ds = tf.data.Dataset.from_tensor_slices((train_filenames, train_targets))

In [7]:
train_ds

<TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [8]:
# Create TensorFlow Dataset for testing
test_filenames = filtered_pd[filtered_pd['split'] == 'test']['filename']
test_targets = filtered_pd[filtered_pd['split'] == 'test']['target']

test_ds = tf.data.Dataset.from_tensor_slices((test_filenames, test_targets))

In [27]:
test_ds

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 1024), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [33]:
# Define the load_wav_16k_mono function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
        file_contents,
        desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)

    # Resample using tf.audio.resample
    target_sample_rate = 16000
    wav = tfio.audio.resample(wav, sample_rate, target_sample_rate)

    return wav

def load_wav_for_map_train(filename, label):
    return load_wav_16k_mono(filename), label


In [29]:
# Define YAMNet model
yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet_model = hub.load(yamnet_model_handle)
yamnet_model.trainable = True

In [31]:
train_ds = tf.data.Dataset.from_tensor_slices((train_filenames, train_targets))
train_ds = train_ds.map(load_wav_for_map_train)

# Create TensorFlow Dataset for testing
test_filenames = filtered_pd[filtered_pd['split'] == 'test']['filename']
test_targets = filtered_pd[filtered_pd['split'] == 'test']['target']

test_ds = tf.data.Dataset.from_tensor_slices((test_filenames, test_targets))

# Load WAV data for mapping
def load_wav_for_map_test(filename, label):
    return load_wav_16k_mono(filename), label

test_ds = test_ds.map(load_wav_for_map_test)
test_ds.element_spec

# Extract embeddings
def extract_embedding(wav_data, label):
    scores, embeddings, spectrogram = yamnet_model(wav_data)
    num_embeddings = tf.shape(embeddings)[0]
    return (embeddings,
            tf.repeat(label, num_embeddings))


train_ds = train_ds.map(extract_embedding).unbatch()
train_ds.element_spec

# Apply embedding extraction to testing dataset
test_ds = test_ds.map(extract_embedding).unbatch()
test_ds.element_spec

# Cache the datasets
cached_train_ds = train_ds.cache()
cached_test_ds = test_ds.cache()

# Shuffle, batch, and prefetch the datasets
train_ds = cached_train_ds.shuffle(1000).batch(64).prefetch(tf.data.AUTOTUNE)
test_ds = cached_test_ds.batch(32).prefetch(tf.data.AUTOTUNE)










In [23]:
my_model = Sequential([
    Input(shape=(1024,), dtype=tf.float32, name='input_embedding'),
    Dense(1024, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(len(my_classes), activation='softmax')
], name='my_model')

my_model.summary()

# Adjust the learning rate and compile the model
initial_learning_rate = 0.0001
lr_schedule = ExponentialDecay(
    initial_learning_rate, decay_steps=10000, decay_rate=0.9, staircase=True)

my_model.compile(loss='sparse_categorical_crossentropy',
                 optimizer=Adam(learning_rate=lr_schedule),
                 metrics=['accuracy'])


batch_size = 32
total_samples = len(train_filenames)
steps_per_epoch = total_samples // batch_size

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 1024)              1049600   
                                                                 
 batch_normalization_7 (Batc  (None, 1024)             4096      
 hNormalization)                                                 
                                                                 
 dropout_7 (Dropout)         (None, 1024)              0         
                                                                 
 dense_11 (Dense)            (None, 512)               524800    
                                                                 
 batch_normalization_8 (Batc  (None, 512)              2048      
 hNormalization)                                                 
                                                                 
 dropout_8 (Dropout)         (None, 512)               0  

In [25]:
history = my_model.fit(train_ds,
                       epochs=50,
                       steps_per_epoch=steps_per_epoch,
                       )

# Evaluate the model on the test set
loss, accuracy = my_model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Loss:  1.0692291259765625
Accuracy:  0.677021861076355


In [34]:
history = my_model.fit(train_ds,
                       epochs=100,
                       steps_per_epoch=steps_per_epoch,
                       )

# Evaluate the model on the test set
loss, accuracy = my_model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100




Loss:  0.9845073819160461
Accuracy:  0.6905588507652283


In [10]:
my_model.save("v_bird_model.h5")


