In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
from glob import glob
import IPython.display as ipd
import tensorflow as tf
from PIL import Image

In [None]:
df = pd.read_csv("/kaggle/input/bird-song-data-set/bird_songs_metadata.csv")
df.head(10)

In [None]:
class_names = df["name"].unique()
print(class_names)

In [None]:
df["name"].value_counts()

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.bar(df["name"].unique(), df["name"].value_counts())

Getting all the WAV files

In [None]:
path_to_wav = "/kaggle/input/bird-song-data-set/wavfiles/"
datafiles = glob(path_to_wav + "*")

Function to generate a spectrogram

In [None]:
def generate_spectrogram(file_audio, identifier):
    audio_data, sample_rate = librosa.load(path_to_wav + file_audio)
    spec_mel = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    spec_mel = librosa.power_to_db(spec_mel, ref=np.max)
    figure, axis = plt.subplots(figsize=(15, 5))
    axis.set_title("Mel Spectrogram")
    plt.suptitle(identifier)
    librosa.display.specshow(spec_mel, x_axis='time', y_axis='log', ax=axis)
    return ipd.Audio(path_to_wav + file_audio, rate=sample_rate)

Generating random spectrogram

In [None]:
i = np.random.randint(0, df.shape[0])
generate_spectrogram(df.loc[i, "filename"], df.loc[i, "name"])

Plotting waveform

In [None]:
filename = "/kaggle/input/bird-song-data-set/wavfiles/101308-0.wav"
audio_data, sample_rate = librosa.load(filename)

plt.plot(audio_data)
plt.title("Waveform")
plt.xlabel("Sample")
plt.ylabel("Amplitude")
plt.show()

In [None]:
filename ="/kaggle/input/bird-song-data-set/wavfiles/101308-0.wav"
audio_data, sample_rate = librosa.load(filename)
spectrogram = librosa.stft(audio_data)
spectrogram = np.abs(spectrogram)

plt.imshow(spectrogram, origin='lower', aspect='auto')
plt.title("Spectrogram")
plt.xlabel("Time (samples)")
plt.ylabel("Frequency")
plt.colorbar()
plt.show()

Preprocessing function for audio

In [None]:
def process_audio(audio_file):
    audio_data, sample_rate = librosa.load(audio_file, duration=10)
    mel_spec = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate) 
    mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec

In [None]:
filename = '/kaggle/input/bird-song-data-set/wavfiles/101308-0.wav'
print(len(process_audio(filename)))
print(len(process_audio(filename)[0]))


Generating a pandas Dataframe to process the data from the CSV and wav files

In [None]:
df_train = pd.DataFrame({"name": df["name"], "audiopath": path_to_wav + df["filename"]})

# Assuming `process_audio` is a function that generates mel spectrograms
df_train["mel_spec"] = df_train["audiopath"].apply(lambda x: process_audio(x))

# Using factorize to encode class labels
df_train["class"] = df_train["name"].factorize()[0]

Shuffle data

In [None]:
from sklearn.utils import shuffle

df_train = shuffle(df_train)
df_train.shape

Seperate train and test data

In [None]:
(train_x, train_y) = df_train["mel_spec"][:5000].values, df_train["class"][:5000].values
(test_x, test_y) = df_train["mel_spec"][5000:].values, df_train["class"][5000:].values

In [None]:
from keras.utils import to_categorical

test_y = to_categorical(test_y, num_classes=len(class_names))
train_y = to_categorical(train_y, num_classes=len(class_names))


Normalize data and generate Tensorflow datasets

In [None]:
train_x = np.stack(train_x[:])
test_x = np.stack(test_x[:])

train_x = tf.keras.utils.normalize(train_x)
test_x = tf.keras.utils.normalize(test_x)

train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y))

Setting batch sizes

In [None]:
train_dataset = train_dataset.batch(10)
test_dataset = test_dataset.batch(10)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
import keras
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Flatten, Dense, Reshape, InputLayer, Dropout
from keras.models import Sequential

Setting up model

In [None]:
model = keras.models.Sequential()

model.add(InputLayer(input_shape=(128,130)))
model.add(Reshape((128,130,1)))
model.add(Conv2D(64, (8, 8), input_shape=(128, 130), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, (2,2), activation='relu'))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))

In [None]:
# model2 = keras.models.Sequential()

# model2.add(InputLayer(input_shape=(128,130)))
# model2.add(Reshape((128,130,1)))
# model2.add(Conv2D(64, (8, 8), input_shape=(128, 130), activation='relu'))
# model2.add(BatchNormalization())
# model2.add(MaxPooling2D(pool_size=(2,2)))
# model2.add(Conv2D(16, (2,2), activation='relu'))
# model2.add(Flatten())
# model2.add(Dropout(0.5))
# model2.add(Dense(128, activation='relu'))
# model2.add(Dense(5, activation='softmax'))

In [None]:
# model3 = keras.models.Sequential()

# model3.add(InputLayer(input_shape=(128,130)))
# model3.add(Reshape((128,130,1)))
# model3.add(Conv2D(64, (8, 8), input_shape=(128, 130), activation='relu'))
# model3.add(BatchNormalization())
# model3.add(MaxPooling2D(pool_size=(2,2)))
# model3.add(Conv2D(16, (2,2), activation='relu'))
# model3.add(Flatten())
# model3.add(Dropout(0.5))
# model3.add(Dense(128, activation='relu'))
# model3.add(Dense(5, activation='softmax'))

In [None]:
model.summary()

Model compiling

In [None]:
model.compile('adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision(), 'accuracy'])

In [None]:
# model2.compile('adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision(), 'accuracy'])

In [None]:
# model3.compile('adam', loss='categorical_crossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision(), 'accuracy'])

Model training

In [None]:
hist = model.fit(train_dataset, epochs=50, validation_data=test_dataset)

In [None]:
# hist2 = model2.fit(train_dataset, epochs=50, validation_data=test_dataset)

In [None]:
# hist3 = model3.fit(train_dataset, epochs=20, validation_data=test_dataset)

Model evaluation on accuracy on test dataset

In [None]:
model.evaluate(test_dataset)

In [None]:
# model2.evaluate(test_dataset)

In [None]:
# model3.evaluate(test_dataset)

In [None]:
pred_y = model.predict(test_x)
true_y = test_y.argmax(axis=1, keepdims=True)

In [None]:
print(pred_y)
print(true_y)

Lots of plots

In [None]:
plt.title('Loss')
plt.plot(hist.history['loss'], 'r', label='Overall Loss')
plt.plot(hist.history['val_loss'], 'b', label='Loss Value per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
# plt.title('Loss')
# plt.plot(hist.history['loss'], 'r', label='Loss for 100 Epoch model')
# plt.plot(hist2.history['loss'], 'b', label='Loss for 50 Epoch model')
# plt.plot(hist3.history['loss'], 'g', label='Loss for 20 Epoch model')
# plt.xlabel('Epoch')
# plt.ylabel('Value')
# plt.legend()
# plt.show()

In [None]:
plt.title('Precision')
plt.plot(hist.history['precision'], 'r', label='Overall Precision')
plt.plot(hist.history['val_precision'], 'b', label='Precision Value per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
plt.title('Recall')
plt.plot(hist.history['recall'], 'r', label='Overall Recall')
plt.plot(hist.history['val_recall'], 'b', label='Recall Value per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
plt.title('Accuracy')
plt.plot(hist.history['accuracy'], 'r', label='Overall Accuracy')
plt.plot(hist.history['val_accuracy'], 'b', label='Accuracy Value per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
audio_file = '//input/bird-song-data-set/wavfiles/101308-9.wav'
audio_data, sample_rate = librosa.load(audio_file, duration=3)
mel_spec = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate) 
mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
mel_spec = tf.expand_dims(mel_spec, axis=0)
bird_list = ["Bewick's Wren", "Northern Mockingbird", "American Robin", "Song Sparrow", "Northern Cardinal"]

test=model.predict(mel_spec)

print(test)

Generate model file for easy usage later

In [None]:
# from IPython.display import FileLink

# model.save("saved_model") 
# !zip -r saved_model.zip './saved_model' 
# FileLink(r'./saved_model.zip')