# accessing last layer of birdnet model

- colab link: https://colab.research.google.com/drive/1d9u93LJQQRTgAmzFMeCaFP0U22dUJOl-?usp=sharing

In [1]:
from pathlib import Path
import requests

checkpoint_path = Path("data/raw/birdnet/v2.2/checkpoint")

checkpoint_base_url = "https://github.com/kahst/BirdNET-Analyzer/raw/main/checkpoints/V2.2/BirdNET_GLOBAL_3K_V2.2_Model"
for part in ["saved_model.pb", "variables/variables.index", "variables/variables.data-00000-of-00001"]:
    # get response content and save to disk in checkpoint path
    content = requests.get(f"{checkpoint_base_url}/{part}").content
    p = (checkpoint_path / part)
    p.parent.mkdir(parents=True, exist_ok=True)
    p.write_bytes(content)

print([p.as_posix() for p in checkpoint_path.glob("**/*")])

['data/raw/birdnet/v2.2/checkpoint/saved_model.pb', 'data/raw/birdnet/v2.2/checkpoint/variables', 'data/raw/birdnet/v2.2/checkpoint/variables/variables.index', 'data/raw/birdnet/v2.2/checkpoint/variables/variables.data-00000-of-00001']


In [2]:
import tensorflow as tf

model = tf.saved_model.load(checkpoint_path, tags=["serve"])



In [3]:
from tensorflow import keras

pb_model = keras.models.load_model(checkpoint_path)
pb_model



<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject at 0x7f8a82e06a90>

In [4]:
import numpy as np


input_shape = pb_model.model.layers[0].get_config()["batch_input_shape"][1]
sample = np.random.rand(1, input_shape)

z = pb_model.model.predict(sample)
z.shape



(1, 3337)

- https://stackoverflow.com/questions/41711190/keras-how-to-get-the-output-of-each-layer/41712013
- https://stackoverflow.com/questions/52444175/get-the-before-last-feature-of-network-for-embedding-is-not-working

In [5]:
pb_model.model.layers[0].get_config()["batch_input_shape"]

(None, 144000)

In [7]:
[layer.name for layer in pb_model.model.layers]

['INPUT',
 'ADVANCED_SPEC1',
 'BNORM_SPEC_NOQUANT',
 'CONV_0',
 'BNORM_0',
 'ACT_0',
 'pool_0_MAX',
 'pool_0_AVG',
 'pool_0_CONCAT',
 'pool_0_ACT_QUANT',
 'pool_0_CONV',
 'BLOCK_1-1_CONV_1',
 'BLOCK_1-1_BN_1',
 'BLOCK_1-1_ACT_1',
 'BLOCK_1-1_ACT_QUANT',
 'BLOCK_1-1_CONV_3',
 'BLOCK_1-1_BN_3',
 'BLOCK_1-2_CONV_1',
 'BLOCK_1-2_BN_1',
 'BLOCK_1-2_ACT_1',
 'BLOCK_1-2_ACT_QUANT',
 'BLOCK_1-2_CONV_3',
 'BLOCK_1-2_BN_3',
 'BLOCK_1-2_ADD',
 'BLOCK_1-3_CONV_1',
 'BLOCK_1-3_BN_1',
 'BLOCK_1-3_ACT_1',
 'BLOCK_1-3_ACT_QUANT',
 'BLOCK_1-3_CONV_3',
 'BLOCK_1-3_BN_3',
 'BLOCK_1-3_ADD',
 'BLOCK_2-1_CONV_1',
 'BLOCK_2-1_BN_1',
 'BLOCK_2-1_ACT_1',
 'BLOCK_2-1_ACT_QUANT',
 'BLOCK_2-1_CONV_3',
 'BLOCK_2-1_BN_3',
 'BLOCK_2-2_CONV_1',
 'BLOCK_2-2_BN_1',
 'BLOCK_2-2_ACT_1',
 'BLOCK_2-2_ACT_QUANT',
 'BLOCK_2-2_CONV_3',
 'BLOCK_2-2_BN_3',
 'BLOCK_2-2_ADD',
 'BLOCK_2-3_CONV_1',
 'BLOCK_2-3_BN_1',
 'BLOCK_2-3_ACT_1',
 'BLOCK_2-3_ACT_QUANT',
 'BLOCK_2-3_CONV_3',
 'BLOCK_2-3_BN_3',
 'BLOCK_2-3_ADD',
 'BLOCK_2-4_CO

In [8]:
from keras.models import Model

emb_model = Model(
    inputs=pb_model.model.input,
    outputs=[pb_model.model.get_layer("GLOBAL_AVG_POOL").output]
  )

z = emb_model.predict(sample)
z.shape



(1, 320)

# Load data from birdclef-2022

In [9]:
import pandas as pd
import numpy as np
import io
import librosa
import librosa.display
import soundfile as sf

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
root_url = "https://storage.googleapis.com/birdclef-2022/raw/birdclef-2022"
metadata_df = pd.read_csv(f"{root_url}/train_metadata.csv")

def get_track_url(filename: str, root_url: str=root_url) -> str:
    return f"{root_url}/train_audio/{filename}"

# @functools.lru_cache(maxsize=32)
def fetch_audio_sr(filename, sample_rate=48000):
    """Fetch audio from a remote source and read it as PCM data."""
    track_url = get_track_url(filename)
    fp = io.BytesIO(requests.get(track_url).content)
    y, sr = librosa.load(fp, sr=sample_rate)
    if y.shape[-1] > 1:
      y = librosa.to_mono(y.T)
    return y, sr

In [12]:
fetch_audio_sr("afrsil1/XC177993.ogg")

(array([ 2.48274114e-06,  2.75472539e-05,  3.04746864e-05, ...,
        -6.61349486e-06,  1.03764796e-04,  8.12338112e-05], dtype=float32),
 48000)

In [13]:
def get_details(file_name):
    y, sr = fetch_audio_sr(file_name)
    return len(y) / sr, sr

In [14]:
# Read only high quality (rating >= 4) and long enough files (length in [27,51] seconds)
meta_df_cut = pd.read_parquet("drive/MyDrive/data/birdnet_clef/meta_df_cut.pq")
meta_df_cut.drop(columns=["license", "sample_rate", "author", "details"], inplace=True)
meta_df_cut.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,rating,time,url,filename,length
0,afrsil1,[],['call'],21.3561,-158.0205,Euodice cantans,African Silverbill,4.0,11:00,https://www.xeno-canto.org/209513,afrsil1/XC209513.ogg,42.866937
1,afrsil1,[],['call'],17.1003,54.2844,Euodice cantans,African Silverbill,4.5,06:35,https://www.xeno-canto.org/395771,afrsil1/XC395771.ogg,27.219
2,akiapo,"['apapan', 'hawama', 'iiwi']",['song'],19.6294,-155.3615,Hemignathus wilsoni,Akiapolaau,4.5,12:31,https://www.xeno-canto.org/122399,akiapo/XC122399.ogg,29.328
3,akiapo,[],['song'],19.6294,-155.3615,Hemignathus wilsoni,Akiapolaau,5.0,12:00,https://www.xeno-canto.org/306424,akiapo/XC306424.ogg,29.910219
4,aniani,[],['call'],22.1467,-159.6134,Magumma parva,Anianiau,4.0,09:00,https://www.xeno-canto.org/210203,aniani/XC210203.ogg,42.08325


In [15]:
meta_df_cut.describe()

Unnamed: 0,latitude,longitude,rating,length
count,1841.0,1841.0,1841.0,1841.0
mean,34.410503,-37.161802,4.485606,37.357469
std,23.893564,72.793099,0.447134,6.884998
min,-46.8968,-176.6322,4.0,27.01
25%,26.1375,-100.5896,4.0,31.08
50%,39.5696,-17.2303,4.5,36.571438
75%,50.5265,10.9621,5.0,43.075906
max,78.925,179.3381,5.0,50.964906


In [16]:
def get_slider(filename):
    y, sr = fetch_audio_sr(filename, sample_rate=48000) # LENGTH > 27 seconds
    return np.concatenate([np.expand_dims(y[i*sr:(i+3)*sr], 0) for i in range(25)], axis=0)

In [17]:
y_cube = get_slider("aniani/XC210203.ogg")

In [18]:
y_cube.shape

(25, 144000)

In [19]:
out = emb_model(y_cube)

In [20]:
out.shape

TensorShape([25, 320])

In [21]:
np.expand_dims(out.numpy(),0)

array([[[1.4449097 , 0.52302724, 0.900517  , ..., 0.9261592 ,
         0.60402757, 0.19873744],
        [2.0589826 , 0.41992864, 1.2794278 , ..., 0.5984857 ,
         0.5463397 , 0.38938674],
        [1.4522858 , 0.563561  , 1.3416977 , ..., 0.43575522,
         0.329533  , 0.2389092 ],
        ...,
        [1.4354645 , 1.1026314 , 0.8678978 , ..., 0.450032  ,
         0.42613757, 0.5474387 ],
        [0.84232444, 0.35587764, 0.29460782, ..., 0.5967485 ,
         0.28373596, 0.33519045],
        [0.9819014 , 0.7643902 , 0.2677348 , ..., 0.60384965,
         0.45256677, 0.49627304]]], dtype=float32)

In [22]:
from tqdm.auto import tqdm

In [23]:
def extract_emb(filename_series):
    emb_cube = []
    for file_name in tqdm(filename_series):
        y_slide = get_slider(filename = file_name) # (25,144000)
        y_emb = emb_model(y_slide) # (25,300)
        emb_cube.append(np.expand_dims(y_emb.numpy(),0))
    return np.concatenate(emb_cube, axis=0) #(1841,25,300)

In [None]:
emb_cube = extract_emb(meta_df_cut.filename)

  0%|          | 0/1841 [00:00<?, ?it/s]

In [None]:
with open('drive/MyDrive/data/birdnet_clef/emb_cube.npy', 'wb') as f:
    np.save(f, emb_cube)
