In [None]:
%config Completer.use_jedi = False

In [None]:
# Importing standard libraries
import os
import time
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint
from glob import glob
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

# Importing Libraries for Audio file reading
import soundfile as sf
import librosa
import librosa.display
import IPython.display as display

# Importing Libraries to build the neural network
import tensorflow as tf
from tensorflow.keras.utils import Sequence, plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

# For data preparation and model evaluation
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.model_selection import train_test_split



import warnings
warnings.filterwarnings("ignore")

In [None]:
# Helper functions
def read_wav_file(path):
    return sf.read(path)

def displayWaveform(data, sr):
    plt.figure(figsize = (14, 5))
    librosa.display.waveplot(data, sr = sr)
    plt.grid()
    plt.show()

def plot_spectrogram(data, sr):
    spectrogram = librosa.feature.melspectrogram(data, sr)
    log_spec = librosa.power_to_db(spectrogram, ref = np.max)
    librosa.display.specshow(log_spectrogram, sr = sr, x_axis = 'time', y_axis = 'mel')
    

In [None]:
# CONFIG
params = {}
params['train_csv'] = '../input/birdclef-2021/train_soundscape_labels.csv'
params['test_csv'] = '../input/birdclef-2021/test.csv'
params['train_metadata'] = '../input/birdclef-2021/train_metadata.csv'
params['train_short_audio'] = '../input/birdclef-2021/train_short_audio'
params['train_soundscapes'] = '../input/birdclef-2021/train_soundscapes'
params['sample_csv'] = '../input/birdclef-2021/sample_submission.csv'
params['labels'] = '../input/birdclef-2021/train_soundscape_labels.csv'
params['test_soundscapes'] = "../input/birdclef-2021/test_soundscapes"
pprint(params)

In [None]:
# Reading csv files
train_csv = pd.read_csv(params['train_csv'])
test_csv = pd.read_csv(params['test_csv'])
train_meta = pd.read_csv(params['train_metadata'])
sample_sub = pd.read_csv(params['sample_csv'])
train_labels = pd.read_csv(params['labels'])

In [None]:
train_csv.head(3)

In [None]:
train_labels.head(3)

In [None]:
sample_sub.sample(3)

In [None]:
train_meta.sample(3)

In [None]:
print(f"Len of train data : {len(train_csv)}")
print(f"Len of test data : {len(test_csv)}")
print(f"Len of train meta : {len(train_meta)}")
print(f"Len of train labels : {len(train_labels)}")

In [None]:
# Lets hear some voices from soundscapes
soundscapes = glob("../input/birdclef-2021/train_soundscapes/*.ogg")

display.Audio(soundscapes[np.random.randint(len(soundscapes))])

In [None]:
# Lets hear some short sounds
short_sounds = glob("../input/birdclef-2021/train_short_audio/*/*.ogg")

display.Audio(short_sounds[np.random.randint(len(short_sounds))])

# EDA

In [None]:
# Distribution of labels
fig = px.histogram(train_labels, x = 'birds', color = 'birds')
fig.update_layout(
    title = 'Distribution of Birds calls/labels',
    title_x = 0.5
)
fig.show()

In [None]:
# Distribution of audio_ids
train_labels.groupby(by=['audio_id']).count()['birds']

In [None]:
train_labels

In [None]:
# There are labels with multiplt birds list in them
# Lets list down unique labels first and observe their count

uniq_labels = []
for bs in train_labels['birds'].values:
    uniq_labels += bs.split()

# '-1' for "no call"
print(f"Num of unique birds : {len(set(uniq_labels)) - 1}")
fig = px.histogram(uniq_labels, color = uniq_labels)
fig.show();

# Finally storing only unique values
uniq_labels = list(set(uniq_labels))

In [None]:
df_train_labels = pd.DataFrame(
    index = train_labels.index,
    columns = uniq_labels
)

for row in train_labels.index:
    birds = train_labels.loc[row, 'birds'].split()
    for bird in birds:
        df_train_labels.loc[row, bird] = 1
        
df_train_labels.fillna(0, inplace = True)

test_csv['birds'] = 'nocall'

df_test_labels = pd.DataFrame(index = test_csv.index,
                              columns = uniq_labels)
for row in test_csv.index:
    birds = test_csv.loc[row, 'birds'].split()
    for bird in birds:
        df_test_labels.loc[row, bird] = 1

df_test_labels.fillna(0, inplace = True)

In [None]:
# Merging the table with the original data

train_labels = pd.concat([train_labels, df_train_labels], axis = 1)
test_csv = pd.concat([test_csv, df_test_labels], axis = 1)


In [None]:
eg = "../input/birdclef-2021/train_soundscapes/10534_SSW_20170429.ogg"
data, sr = read_wav_file(eg)
print(len(data))
print(data[:5]) # first 5 entries
print(sr)

In [None]:
audio_id, site, _ = eg.split("/")[-1].split("_")
train_labels[(train_labels['audio_id']==int(audio_id)) & (train_labels['site']==site) & (train_labels['birds']!='nocall')].head(5)

In [None]:
# Lets hear this bird for index 1450
sub_data = data[int(50/5)*160000 : int(55/5)*160000]

plt.figure(figsize=(14, 5))
librosa.display.waveplot(sub_data, sr=sr)
plt.grid()
plt.show();

display.Audio(sub_data, rate=sr)

This voice sounds more like fighter plane crash landing 😂

In [None]:
params['data_len'] = 160000
params['audio_len'] = 5
params['num_labels'] = len(uniq_labels)
params['for_training'] = {
    'bs' : 16,
    'epochs' : 50,
}
pprint(params)

# Preparing Dataset

In [None]:
train_ids, val_ids = train_test_split(
                            list(train_labels.index),
                            test_size = 0.3,
                            random_state = 2021)
test_ids = list(sample_sub.index)

Creating a custom data loader using [sequence](http://https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence) class

In [None]:
class DataLoader(Sequence):
    def __init__(self, path : str, list_ids : list, data : "Dataframe", batch_size : int) -> "Data for Training":
        self.path = path
        self.list_IDs = list_ids
        self.data = data
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.list_IDs))
    
    def __len__(self):
        len_ = int(len(self.list_IDs)/self.batch_size)
        if len_*self.batch_size < len(self.list_IDs):
            len_ += 1
        return len_
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        X = X.reshape((self.batch_size, 100, 1600//2))
        return X, y
    
    def __data_generation(self, list_IDs_temp):
        X = np.zeros((self.batch_size, params['data_len']//2))
        y = np.zeros((self.batch_size, params['num_labels']))
        for i, ID in enumerate(list_IDs_temp):
            prefix = str(self.data.loc[ID, 'audio_id']) + '_' + self.data.loc[ID, 'site']
            file_list = [s for s in os.listdir(self.path) if prefix in s]
            if len(file_list) == 0:
                # Dummy for missing test audio files
                audio_file_fft = np.zeros((params['data_len']//2))
            else:
                file = file_list[0]
                audio_file, audio_sr = read_wav_file(os.path.join(self.path, file))
                audio_file = audio_file[int((self.data.loc[ID, 'seconds']-5)/params['audio_len'])*params['data_len']:int(self.data.loc[ID, 'seconds']/params['audio_len'])*params['data_len']]
                audio_file_fft = np.abs(np.fft.fft(audio_file)[: len(audio_file)//2])
                # scale data
                audio_file_fft = (audio_file_fft-audio_file_fft.mean())/audio_file_fft.std()
            X[i, ] = audio_file_fft
            y[i, ] = self.data.loc[ID, self.data.columns[5:]].values
        return X, y

In [None]:
# Now we have our Data Loader ready lets build our train_gen, val_gen and test_gen
train_gen = DataLoader(params['train_soundscapes'],
                       train_ids,
                       train_labels,
                       params['for_training']['bs']
                      )

val_gen = DataLoader(params['train_soundscapes'],
                     val_ids,
                     train_labels,
                     params['for_training']['bs']
                      )

test_gen = DataLoader(params['test_soundscapes'],
                      test_ids,
                      test_csv,         
                      params['for_training']['bs']
                      )

# Building Model

* Input_shape : (16, 100, 800)
* Output_shape : (16, 49)

In [None]:
def build_model():
    model = Sequential()
    model.add(LSTM(128,input_shape=(100, 800)))
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(params['num_labels'], activation='sigmoid'))
    
    return model

In [None]:
clf = build_model()
clf.summary()

In [None]:
tf.keras.utils.plot_model(clf, show_layer_names = True, show_shapes = True)

In [None]:
# Compiling model 
clf.compile(optimizer = 'adam',
            loss = 'binary_crossentropy',
            metrics = ['binary_accuracy', 'accuracy', 'AUC']
           )

In [None]:
train_hist = clf.fit_generator(generator = train_gen,
                              validation_data = val_gen,
                              epochs = 2,
                              workers = 4)

In [None]:
train_hist.history.keys()

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(16, 4))
fig.subplots_adjust(hspace = .2, wspace=.2)
axs = axs.ravel()
loss = train_hist.history['loss']
loss_val = train_hist.history['val_loss']
epochs = range(1, len(loss)+1)
axs[0].plot(epochs, loss, 'b', label='loss_train')
axs[0].plot(epochs, loss_val, 'r', label='loss_val')
axs[0].set_title('Value of the loss function')
axs[0].set_xlabel('epochs')
axs[0].set_ylabel('value of the loss function')
axs[0].legend()
axs[0].grid()
acc = train_hist.history['auc']
acc_val = train_hist.history['val_auc']
axs[1].plot(epochs, acc, 'b', label='AUC_train')
axs[1].plot(epochs, acc_val, 'r', label='AUC_val')
axs[1].set_title('Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Value of accuracy')
axs[1].legend()
axs[1].grid()
plt.show()

# Inference

In [None]:
y_pred = clf.predict_generator(test_gen, verbose=1)

In [None]:
y_test = np.where(y_pred > 0.5, 1, 0)
for row in sample_sub.index:
    string = ''
    for col in range(len(y_test[row])):
        if y_test[row][col] == 1:
            if string == '':
                string += uniq_labels[col]
            else:
                string += ' ' + uniq_labels[col]
    if string == '':
        string = 'nocall'
    sample_sub.loc[row, 'birds'] = string

In [None]:
# Saving submission

res = sample_sub
res.to_csv('submission.csv', index=False)

# Saving model weights

In [None]:
!mkdir ./baseline
clf.save("./baseline/baseline.h5")

In [None]:
# !pip install kaggle

In [None]:
# !cp ../input/kaggle-token/kaggle_token.json ./
# !mv ./kaggle_token.json ./kaggle.json

# !ls -l ../../root
# !cp ./kaggle.json ../../root/
# !ls ../../root

# !mkdir ../../root/.kaggle
# !mv ../../root/kaggle.json ../../root/.kaggle/kaggle.json

# !chmod 600 /root/.kaggle/kaggle.json
# !kaggle datasets init -p ./baseline

In [None]:
# !cat ./baseline/dataset-metadata.json

# import json
# with open("./baseline/dataset-metadata.json", 'r+') as file_:
#     meta_data = json.load(file_)
#     meta_data['title'] = 'baseline_BirdCLEF'
#     meta_data['id'] = 'hotsonhonet/BirdCLEF'
#     file_.seek(0)        
#     json.dump(meta_data, file_, indent=4)
#     file_.truncate()
    
# print(meta_data['title'], meta_data['id'])
# print("\nAfter editing\n")
# !cat ./baseline/dataset-metadata.json

In [None]:
# !kaggle datasets create -p ./baseline