In [1]:
!pip install '../input/birdpred/noisereduce-1.1.0/noisereduce-1.1.0'
!pip install '../input/birdpred/tfa.whl/tensorflow_addons-0.11.1-cp37-cp37m-manylinux2010_x86_64.whl'

Processing /kaggle/input/birdpred/noisereduce-1.1.0/noisereduce-1.1.0
Building wheels for collected packages: noisereduce
  Building wheel for noisereduce (setup.py) ... [?25l- \ done
[?25h  Created wheel for noisereduce: filename=noisereduce-1.1.0-py3-none-any.whl size=7608 sha256=fdf9b31e15e881016ff93688ff7f20858efcd3e8f433ad9341e441b05078de19
  Stored in directory: /root/.cache/pip/wheels/c1/c8/78/7f4b312133a0f006df823510f9f1b94be357cd8103da1256a0
Successfully built noisereduce
Installing collected packages: noisereduce
Successfully installed noisereduce-1.1.0
Processing /kaggle/input/birdpred/tfa.whl/tensorflow_addons-0.11.1-cp37-cp37m-manylinux2010_x86_64.whl
Installing collected packages: tensorflow-addons
  Attempting uninstall: tensorflow-addons
    Found existing installation: tensorflow-addons 0.10.0
    Uninstalling tensorflow-addons-0.10.0:
      Successfully uninstalled tensorflow-addons-0.10.0
Successfully installed tensorflow-addons-0.11.1


In [2]:
import numpy as np 
import pandas as pd

import tensorflow as tf

import librosa
import noisereduce as no
import cv2 as cv
import matplotlib.pyplot as plt

import tensorflow_addons as tfa

import functools
import operator
import math
import os

  from tqdm.autonotebook import tqdm


In [3]:
tf.__version__

'2.3.0'

## Load a pretrained EfficientNetB5 model

In [4]:
bird_dir = '../input/birdsong-recognition/train_audio'

for root, dirs, files in os.walk(bird_dir):
    if root == bird_dir:
        birds = dirs
        
birds.append('nocall')
birds.sort()

In [5]:
class_names = np.array(birds)

In [6]:
batch_size = 16
img_size = (256, 2600)
seed = 1
storage_dir = '../input/birdmel/train_img_final/train_img_final'
model_img_height = 256
model_img_width = 512
class_num = 265

In [7]:
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(scale=1./255)

def resize(image, height=model_img_height, width=model_img_width):
    return tf.image.resize_with_crop_or_pad(image, target_height=height, target_width=width)

def augment_image_test(image):
    image = resize(image)
    image = normalization_layer(image)

    return image

In [8]:
net = tf.keras.models.load_model('../input/birdpred/efn_b4_tf_2_3_0_final.h5')

## Prediction

In [9]:
sr = 22050
n_fft = 2048
hop_length = 512   
n_mels = 256
fmin = 20
fmax = 16000

In [10]:
def load_audio(path):
    signal, _ = librosa.load(path, res_type='kaiser_fast', mono=True, sr=sr)
    return no.reduce_noise(audio_clip=signal, noise_clip=signal, verbose=False)

def get_melspectrogram_db(signal):
    spec = librosa.feature.melspectrogram(
        signal, 
        sr=sr, 
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels,
        fmin=fmin,
        fmax=fmax
    )
    
    return librosa.power_to_db(spec, ref=np.max)

In [11]:
def process_img(img):
    img = img[:, np.any(img > 24, axis=0)]

    img = cv.GaussianBlur(img, (5,5), 0)
    img = cv.medianBlur(img, 3)
    
    clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    img = clahe.apply(img)
    
    return img

In [12]:
def spec_to_image(spec, eps=1e-6):
    mean = spec.mean()
    std = spec.std()
    spec_norm = (spec - mean) / (std + eps)
    spec_min, spec_max = spec_norm.min(), spec_norm.max()
    spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
    spec_scaled = spec_scaled.astype(np.uint8)
    spec_scaled = np.flip(spec_scaled, axis=0)

    return process_img(spec_scaled)

In [13]:
def repeat(imgs):
    new_imgs = []
    
    for img in imgs:
        residual_img = np.pad(img, ((0, 0), (0, 20)), 'constant')
        repeats = int(np.ceil(model_img_width/residual_img.shape[1]))
        new_imgs.append(np.tile(residual_img, (1, repeats))[:, :model_img_width])
        
    return new_imgs    

In [14]:
image_x = None

# 216 frames for 5 seconds
def split_img(img, size=216):    
    width = img.shape[1]
    split_num = int(np.floor(width/size))
    residual = width - split_num*size
    end_split_index = width - residual
    
    imgs = []
    if split_num > 0:
        imgs = np.array_split(img[:, :end_split_index], split_num, axis=1)
        imgs = repeat(imgs)
    
    if residual > 0:
        residual_img = img[:, end_split_index:]
        residual_img = repeat([residual_img])[0]
        
        imgs.append(residual_img)
    
    return imgs

In [15]:
def file_to_imgs(signal, size=216):
    img = spec_to_image(get_melspectrogram_db(signal))
    imgs = split_img(img, size)
    imgs = list(map(lambda x: cv.merge((x, x, x)), imgs))

    return imgs

In [16]:
def predict_imgs(model, class_names, imgs, threshold = 0.9):
    tensors = list(map(lambda x: tf.expand_dims(augment_image_test(x), 0), imgs))
    batch = np.vstack(tensors)
    probas = net.predict(x=batch)
    selected_class_names = list(set(functools.reduce(
        lambda y, x: operator.iconcat(y, list(class_names[x >= threshold])), 
        probas, 
        []
    )))
    
    selected_class_names.sort()
    
    try:
        del selected_class_names[selected_class_names.index('nocall')]
    except:
        pass
        
    return selected_class_names

In [17]:
def predict(test_folder='birdsong-recognition', threshold=0.9):
    row_ids = []
    birds = []
    audio_dir = '../input/' + test_folder + '/test_audio/'
    df = pd.read_csv('../input/' + test_folder + '/test.csv')
    df = df.sort_values(by=['audio_id'])
    
    current_filename = None
    current_signal = None
    for index, row in df.iterrows():
        row_id = row['row_id']
        filename = row['audio_id']
        site = row['site']
        file = audio_dir + filename + '.mp3'
        
        if current_filename != filename:
            current_filename = filename
            current_signal = load_audio(file)

        if site == 'site_3':
            images = file_to_imgs(current_signal, size=324)
        else:
            starting_index = (row['seconds'] - 5)*sr
            signal = current_signal[int(starting_index):int(starting_index + 5*sr)]
            images = file_to_imgs(signal)
           
        preds = predict_imgs(net, class_names, images, threshold=threshold)
        row_ids.append(row_id)
        birds.append(preds)
        
    return pd.DataFrame({
        'row_id': row_ids,
        'birds': list(map(lambda x: 'nocall' if len(x) == 0 else ' '.join(x), birds))
    })

In [18]:
import warnings
warnings.filterwarnings('ignore')

try:
    sub_df = predict(test_folder='birdsong-recognition', threshold=0.875)
except:
    sub_df = predict(test_folder='birdcall-check', threshold=0.875)

In [19]:
sub_df.to_csv('submission.csv', index=False)

In [20]:
# sub_df.shape

In [21]:
# sub_df = sub_df.sort_values(by=['row_id']).reset_index(drop=True)
# sample_sub = pd.read_csv('../input/birdpred/mock_submission.csv').sort_values(by=['row_id']).reset_index(drop=True)
# sum(sub_df['birds'] != sample_sub['birds'])

In [22]:
# sub_df[sub_df['birds'] != sample_sub['birds']]

In [23]:
# sample_sub[sub_df['birds'] != sample_sub['birds']]