In [20]:
import matplotlib
import os
import sys
matplotlib.use('TkAgg')
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import average_precision_score
from sklearn.model_selection import train_test_split, ShuffleSplit
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
from importlib import reload
sys.path.append(os.path.abspath(".."))
import src.data_processing.data_augmentation as daug
import src.models.CNN as CNN
reload(CNN)
from src.models.active_learning_kcluster import kCenterGreedy

In [12]:
# Read in labelled data
df = pd.read_csv("/Users/jameshill/PycharmProjects/bioacoustic-classifier/src/data/annotations/spectrogram_labels.csv")
df['filepath'] = "/Users/jameshill/PycharmProjects/bioacoustic-classifier/data/processed/spectrogram_3s/" + df['filename'] + ".png"
df['split_labels'] = df['label'].str.split('_and_')

In [18]:
# Get arrays
filepaths = df['filepath'].values
# Initialise and fit multi-label encoder
mle = MultiLabelBinarizer()
multi_labels = mle.fit_transform(df['split_labels'])
labels = multi_labels
cnn = CNN.define_cnn(mle.classes_)
assert cnn.output_shape[-1] == len(mle.classes_)
label_msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1929)

In [14]:
mle.classes_

array(['background_noise', 'blackcap', 'bluetit', 'carrion_crow',
       'chiffchaff', 'common_whitethroat', 'dunnock', 'eurasian_skylark',
       'pheasant', 'unknown_bird', 'unknown_bird_10', 'unknown_bird_11',
       'unknown_bird_12', 'unknown_bird_13', 'unknown_bird_14',
       'unknown_bird_15', 'unknown_bird_16', 'unknown_bird_17',
       'unknown_bird_18', 'unknown_bird_2', 'unknown_bird_3',
       'unknown_bird_4', 'unknown_bird_5', 'unknown_bird_9', 'woodpigeon',
       'wren', 'yellowhammer'], dtype=object)

In [21]:
# Split for labelled data
idx = np.arange(len(filepaths))
rest_idx, test_idx = next(label_msss.split(idx, labels)) # test set will not be touched by anything until the very end

In [39]:
def make_ds_with_aug(filepaths, labels, indices, flags, batch_size=32, seed=1929):
    idx = np.asarray(indices, dtype=int)
    labs = np.asarray(labels)[idx].astype('float32')
    paths = np.asarray(filepaths)[idx]
    flgs = np.asarray(flags).astype('bool')

    ds = tf.data.Dataset.from_tensor_slices((paths, labs, flgs))
    ds = ds.shuffle(len(idx), seed=seed, reshuffle_each_iteration=True)
    ds = ds.map(daug.decode_image_with_aug, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def make_ds_no_aug(filepaths, labels, indices, batch_size):
    ds = tf.data.Dataset.from_tensor_slices((filepaths[indices],
                                             labels[indices].astype('float32')))
    ds = ds.map(daug.decode_image, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

def unlabelled_decode(filename: tf.Tensor):
    image = tf.io.read_file(filename)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.resize(image, [64, 512])
    image = tf.cast(image, tf.float32) / 255.0
    return image

def make_unlabelled_ds(filepaths: np.ndarray, indices: np.ndarray, batch_size: int):
    ds = tf.data.Dataset.from_tensor_slices(filepaths[indices])
    ds = ds.map(unlabelled_decode, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(batch_size, drop_remainder=False).prefetch(tf.data.AUTOTUNE)
    return ds

In [40]:
# Read in unlabelled data
# These will only be used for pretraining the autoencoder
from pathlib import Path

folder = Path("/Users/jameshill/PycharmProjects/bioacoustic-classifier/data/processed/spectrogram_3s/unlabelled")
files = np.array(sorted(str(p) for p in folder.glob("*.png")), dtype=np.str_)

# Split out some validation data for assessing the autoencoder
ss = ShuffleSplit(n_splits=1, test_size=0.1, random_state=1929)
unlabelled_train_idx, unlabelled_val_idx = next(ss.split(files))
cae_train_ds = make_unlabelled_ds(files, unlabelled_train_idx, batch_size=64)
cae_val_ds = make_unlabelled_ds(files, unlabelled_val_idx, batch_size=64)

In [43]:
def conv_encoder(input_shape=(64,512,1)):
    inp = keras.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(inp)
    x = layers.MaxPooling2D(2)(x)             # 32x256
    x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    x = layers.MaxPooling2D(2)(x)             # 16x128
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
    x = layers.MaxPooling2D(2)(x)             # 8x64
    x = layers.Conv2D(256, 3, padding="same", activation="relu",
                      kernel_regularizer=keras.regularizers.l2(1e-4))(x)
    # Bottleneck is a **feature map** (8x64x256), not GAP
    return keras.Model(inp, x, name="conv_encoder")

In [44]:
def build_cae(input_shape=(64,512,1)):
    enc = conv_encoder(input_shape)
    z   = enc.output                     # (8,64,256)

    x = layers.UpSampling2D((2,2))(z)    # 16x128
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
    x = layers.UpSampling2D((2,2))(x)    # 32x256
    x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    x = layers.UpSampling2D((2,2))(x)    # 64x512
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    out = layers.Conv2D(1, 3, padding="same", activation="sigmoid")(x)

    cae = keras.Model(enc.input, out, name="cae")
    return cae, enc


In [52]:
cae_train_images = cae_train_ds.map(lambda x: (x,x))
cae_val_images = cae_val_ds.map(lambda x: (x,x))

In [54]:
cae, enc = build_cae()
cae.compile(optimizer=keras.optimizers.Adam(1e-3), loss="binary_crossentropy")
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath='conv_ae.keras',
        save_best_only=True,
        save_weights_only=False,
        monitor='val_loss')
        ]
# Include early stopping to protect against overfit. Patience is 10 - how many epochs before comparing loss.
early_cb = keras.callbacks.EarlyStopping(monitor="val_loss", mode="min",
                                         patience=10, restore_best_weights=True, verbose=1)
# If loss starts to plateau, reduce the LR
plateau_cb = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5,
                               patience=4, min_lr=1e-5, verbose=1)
# Fit the model
cae_history = cae.fit(cae_train_images, epochs=100, validation_data=cae_val_images, callbacks=[callbacks,early_cb,plateau_cb])

Epoch 1/100
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4662s[0m 9s/step - loss: 0.6013 - val_loss: 0.5817 - learning_rate: 0.0010
Epoch 2/100
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5396s[0m 10s/step - loss: 0.5801 - val_loss: 0.5798 - learning_rate: 0.0010
Epoch 3/100
[1m  5/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:28:00[0m 10s/step - loss: 0.5917

KeyboardInterrupt: 

In [53]:
cae_train_images

<_MapDataset element_spec=(TensorSpec(shape=(None, 64, 512, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 64, 512, 1), dtype=tf.float32, name=None))>