In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf
import random
import argparse
import tensorflow.keras as keras
from tensorflow.keras import layers, losses, optimizers
import sys
sys.path.append('../')
from birdsong_recognition.preprocess import *

In [3]:
from IPython.display import Audio

In [None]:
%cd ../

In [5]:
ebirds = ['norcar', 'blujay', 'bkcchi']

In [6]:
element_spec = (tf.TensorSpec(shape=(132300,), dtype=tf.float32, name='input'),
                tf.TensorSpec(shape=(), dtype=tf.int32, name='label'))

In [7]:
train_ds, val_ds = create_ds(element_spec)

In [8]:
val_ds_ = val_ds.batch(1)

In [9]:
model = keras.models.load_model('model/model.h5')

In [10]:
history = model.evaluate(val_ds_)



In [11]:
val_pred = model.predict(val_ds_)
val_pred_label = np.argmax(val_pred, axis=1)

In [12]:
val_labels = list(val_ds_.as_numpy_iterator())
val_labels_flat = np.array([])
for batch in val_labels:
    val_labels_flat = np.concatenate((val_labels_flat, batch[1]))

In [13]:
assert val_labels_flat.shape[0] == val_pred_label.shape[0]
assert val_pred_label.shape[0] == len(val_labels)

In [14]:
errors = val_labels_flat - val_pred_label
accuracy = 1-errors[errors != 0].shape[0]/len(val_labels)
assert np.around(accuracy, decimals=2) == np.around(history[1], decimals=2)

In [15]:
error_indices = np.where(errors != 0)[0]
audio_val_ds_ = tf.data.experimental.load('preprocessed_dataset/val_ds', element_spec=element_spec)
audio_val_ds = list(audio_val_ds_)

### Often times, the clip with error just has noise. This is because the clips are made by splitting up longer audio pieces into 3 seconds pieces. There could be long gaps in the song.

In [16]:
error_index = np.random.choice(error_indices)
error_pred = ebirds[val_pred_label[error_index]]
error_label = ebirds[int(val_labels_flat[error_index])]
print('predicted label: {}, actual label: {}'.format(error_pred, error_label))
print('logits: {}'.format(val_pred[error_index]))
Audio(audio_val_ds[error_index][0], rate=44.1e3)

predicted label: blujay, actual label: norcar
logits: [ 43.69376   56.901382 -63.27296 ]


In [28]:
bird_index, bird_index_count = np.unique(val_labels_flat, return_counts=True)
for i, bird in enumerate(bird_index):
    print('bird: {} has {} 3-sec samples.'.format(ebirds[int(bird)], bird_index_count[i]))

bird: norcar has 413 3-sec samples.
bird: blujay has 119 3-sec samples.
bird: bkcchi has 219 3-sec samples.


In [30]:
error_by_bird_index = val_labels_flat[error_indices]
error_bird_index, error_bird_index_count = np.unique(error_by_bird_index, return_counts=True)
percent_error = np.around(100*error_bird_index_count/bird_index_count, decimals=2)
for i, bird in enumerate(error_bird_index):
    print('bird: {} has {} error samples, or {} percent error.'.format(ebirds[int(bird)], 
                                                                       error_bird_index_count[i], percent_error[i]))

bird: norcar has 75 error samples, or 18.16 percent error.
bird: blujay has 5 error samples, or 4.2 percent error.
bird: bkcchi has 3 error samples, or 1.37 percent error.
